avx512vlintrin.h 322 KB


  1. /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __IMMINTRIN_H
  24. #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
  25. #endif
  26. #ifndef __AVX512VLINTRIN_H
  27. #define __AVX512VLINTRIN_H
  28. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
  29. /* Doesn't require avx512vl, used in avx512dqintrin.h */
  30. static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
  31. _mm_setzero_di(void) {
  32. return (__m128i)(__v2di){ 0LL, 0LL};
  33. }
  34. /* Integer compare */
  35. #define _mm_cmpeq_epi32_mask(A, B) \
  36. _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
  37. #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
  38. _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
  39. #define _mm_cmpge_epi32_mask(A, B) \
  40. _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
  41. #define _mm_mask_cmpge_epi32_mask(k, A, B) \
  42. _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
  43. #define _mm_cmpgt_epi32_mask(A, B) \
  44. _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
  45. #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
  46. _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
  47. #define _mm_cmple_epi32_mask(A, B) \
  48. _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
  49. #define _mm_mask_cmple_epi32_mask(k, A, B) \
  50. _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
  51. #define _mm_cmplt_epi32_mask(A, B) \
  52. _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
  53. #define _mm_mask_cmplt_epi32_mask(k, A, B) \
  54. _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
  55. #define _mm_cmpneq_epi32_mask(A, B) \
  56. _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
  57. #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
  58. _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
  59. #define _mm256_cmpeq_epi32_mask(A, B) \
  60. _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
  61. #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
  62. _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
  63. #define _mm256_cmpge_epi32_mask(A, B) \
  64. _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
  65. #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
  66. _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
  67. #define _mm256_cmpgt_epi32_mask(A, B) \
  68. _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
  69. #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
  70. _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
  71. #define _mm256_cmple_epi32_mask(A, B) \
  72. _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
  73. #define _mm256_mask_cmple_epi32_mask(k, A, B) \
  74. _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
  75. #define _mm256_cmplt_epi32_mask(A, B) \
  76. _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
  77. #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
  78. _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
  79. #define _mm256_cmpneq_epi32_mask(A, B) \
  80. _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
  81. #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
  82. _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
  83. #define _mm_cmpeq_epu32_mask(A, B) \
  84. _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
  85. #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
  86. _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
  87. #define _mm_cmpge_epu32_mask(A, B) \
  88. _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
  89. #define _mm_mask_cmpge_epu32_mask(k, A, B) \
  90. _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
  91. #define _mm_cmpgt_epu32_mask(A, B) \
  92. _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
  93. #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
  94. _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
  95. #define _mm_cmple_epu32_mask(A, B) \
  96. _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
  97. #define _mm_mask_cmple_epu32_mask(k, A, B) \
  98. _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
  99. #define _mm_cmplt_epu32_mask(A, B) \
  100. _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
  101. #define _mm_mask_cmplt_epu32_mask(k, A, B) \
  102. _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
  103. #define _mm_cmpneq_epu32_mask(A, B) \
  104. _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
  105. #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
  106. _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
  107. #define _mm256_cmpeq_epu32_mask(A, B) \
  108. _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
  109. #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
  110. _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
  111. #define _mm256_cmpge_epu32_mask(A, B) \
  112. _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
  113. #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
  114. _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
  115. #define _mm256_cmpgt_epu32_mask(A, B) \
  116. _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
  117. #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
  118. _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
  119. #define _mm256_cmple_epu32_mask(A, B) \
  120. _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
  121. #define _mm256_mask_cmple_epu32_mask(k, A, B) \
  122. _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
  123. #define _mm256_cmplt_epu32_mask(A, B) \
  124. _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
  125. #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
  126. _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
  127. #define _mm256_cmpneq_epu32_mask(A, B) \
  128. _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
  129. #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
  130. _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
  131. #define _mm_cmpeq_epi64_mask(A, B) \
  132. _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
  133. #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
  134. _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
  135. #define _mm_cmpge_epi64_mask(A, B) \
  136. _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
  137. #define _mm_mask_cmpge_epi64_mask(k, A, B) \
  138. _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
  139. #define _mm_cmpgt_epi64_mask(A, B) \
  140. _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
  141. #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
  142. _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
  143. #define _mm_cmple_epi64_mask(A, B) \
  144. _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
  145. #define _mm_mask_cmple_epi64_mask(k, A, B) \
  146. _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
  147. #define _mm_cmplt_epi64_mask(A, B) \
  148. _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
  149. #define _mm_mask_cmplt_epi64_mask(k, A, B) \
  150. _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
  151. #define _mm_cmpneq_epi64_mask(A, B) \
  152. _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
  153. #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
  154. _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
  155. #define _mm256_cmpeq_epi64_mask(A, B) \
  156. _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
  157. #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
  158. _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
  159. #define _mm256_cmpge_epi64_mask(A, B) \
  160. _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
  161. #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
  162. _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
  163. #define _mm256_cmpgt_epi64_mask(A, B) \
  164. _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
  165. #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
  166. _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
  167. #define _mm256_cmple_epi64_mask(A, B) \
  168. _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
  169. #define _mm256_mask_cmple_epi64_mask(k, A, B) \
  170. _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
  171. #define _mm256_cmplt_epi64_mask(A, B) \
  172. _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
  173. #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
  174. _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
  175. #define _mm256_cmpneq_epi64_mask(A, B) \
  176. _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
  177. #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
  178. _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
  179. #define _mm_cmpeq_epu64_mask(A, B) \
  180. _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
  181. #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
  182. _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
  183. #define _mm_cmpge_epu64_mask(A, B) \
  184. _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
  185. #define _mm_mask_cmpge_epu64_mask(k, A, B) \
  186. _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
  187. #define _mm_cmpgt_epu64_mask(A, B) \
  188. _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
  189. #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
  190. _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
  191. #define _mm_cmple_epu64_mask(A, B) \
  192. _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
  193. #define _mm_mask_cmple_epu64_mask(k, A, B) \
  194. _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
  195. #define _mm_cmplt_epu64_mask(A, B) \
  196. _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
  197. #define _mm_mask_cmplt_epu64_mask(k, A, B) \
  198. _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
  199. #define _mm_cmpneq_epu64_mask(A, B) \
  200. _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
  201. #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
  202. _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
  203. #define _mm256_cmpeq_epu64_mask(A, B) \
  204. _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
  205. #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
  206. _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
  207. #define _mm256_cmpge_epu64_mask(A, B) \
  208. _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
  209. #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
  210. _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
  211. #define _mm256_cmpgt_epu64_mask(A, B) \
  212. _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
  213. #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
  214. _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
  215. #define _mm256_cmple_epu64_mask(A, B) \
  216. _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
  217. #define _mm256_mask_cmple_epu64_mask(k, A, B) \
  218. _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
  219. #define _mm256_cmplt_epu64_mask(A, B) \
  220. _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
  221. #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
  222. _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
  223. #define _mm256_cmpneq_epu64_mask(A, B) \
  224. _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
  225. #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
  226. _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
  227. static __inline__ __m256i __DEFAULT_FN_ATTRS
  228. _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  229. {
  230. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  231. (__v8si)_mm256_add_epi32(__A, __B),
  232. (__v8si)__W);
  233. }
  234. static __inline__ __m256i __DEFAULT_FN_ATTRS
  235. _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  236. {
  237. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  238. (__v8si)_mm256_add_epi32(__A, __B),
  239. (__v8si)_mm256_setzero_si256());
  240. }
  241. static __inline__ __m256i __DEFAULT_FN_ATTRS
  242. _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  243. {
  244. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  245. (__v4di)_mm256_add_epi64(__A, __B),
  246. (__v4di)__W);
  247. }
  248. static __inline__ __m256i __DEFAULT_FN_ATTRS
  249. _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  250. {
  251. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  252. (__v4di)_mm256_add_epi64(__A, __B),
  253. (__v4di)_mm256_setzero_si256());
  254. }
  255. static __inline__ __m256i __DEFAULT_FN_ATTRS
  256. _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  257. {
  258. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  259. (__v8si)_mm256_sub_epi32(__A, __B),
  260. (__v8si)__W);
  261. }
  262. static __inline__ __m256i __DEFAULT_FN_ATTRS
  263. _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  264. {
  265. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  266. (__v8si)_mm256_sub_epi32(__A, __B),
  267. (__v8si)_mm256_setzero_si256());
  268. }
  269. static __inline__ __m256i __DEFAULT_FN_ATTRS
  270. _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  271. {
  272. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  273. (__v4di)_mm256_sub_epi64(__A, __B),
  274. (__v4di)__W);
  275. }
  276. static __inline__ __m256i __DEFAULT_FN_ATTRS
  277. _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  278. {
  279. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  280. (__v4di)_mm256_sub_epi64(__A, __B),
  281. (__v4di)_mm256_setzero_si256());
  282. }
  283. static __inline__ __m128i __DEFAULT_FN_ATTRS
  284. _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  285. {
  286. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  287. (__v4si)_mm_add_epi32(__A, __B),
  288. (__v4si)__W);
  289. }
  290. static __inline__ __m128i __DEFAULT_FN_ATTRS
  291. _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  292. {
  293. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  294. (__v4si)_mm_add_epi32(__A, __B),
  295. (__v4si)_mm_setzero_si128());
  296. }
  297. static __inline__ __m128i __DEFAULT_FN_ATTRS
  298. _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  299. {
  300. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  301. (__v2di)_mm_add_epi64(__A, __B),
  302. (__v2di)__W);
  303. }
  304. static __inline__ __m128i __DEFAULT_FN_ATTRS
  305. _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  306. {
  307. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  308. (__v2di)_mm_add_epi64(__A, __B),
  309. (__v2di)_mm_setzero_si128());
  310. }
  311. static __inline__ __m128i __DEFAULT_FN_ATTRS
  312. _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  313. {
  314. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  315. (__v4si)_mm_sub_epi32(__A, __B),
  316. (__v4si)__W);
  317. }
  318. static __inline__ __m128i __DEFAULT_FN_ATTRS
  319. _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  320. {
  321. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  322. (__v4si)_mm_sub_epi32(__A, __B),
  323. (__v4si)_mm_setzero_si128());
  324. }
  325. static __inline__ __m128i __DEFAULT_FN_ATTRS
  326. _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  327. {
  328. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  329. (__v2di)_mm_sub_epi64(__A, __B),
  330. (__v2di)__W);
  331. }
  332. static __inline__ __m128i __DEFAULT_FN_ATTRS
  333. _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  334. {
  335. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  336. (__v2di)_mm_sub_epi64(__A, __B),
  337. (__v2di)_mm_setzero_si128());
  338. }
  339. static __inline__ __m256i __DEFAULT_FN_ATTRS
  340. _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
  341. {
  342. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
  343. (__v4di)_mm256_mul_epi32(__X, __Y),
  344. (__v4di)__W);
  345. }
  346. static __inline__ __m256i __DEFAULT_FN_ATTRS
  347. _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
  348. {
  349. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
  350. (__v4di)_mm256_mul_epi32(__X, __Y),
  351. (__v4di)_mm256_setzero_si256());
  352. }
  353. static __inline__ __m128i __DEFAULT_FN_ATTRS
  354. _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
  355. {
  356. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
  357. (__v2di)_mm_mul_epi32(__X, __Y),
  358. (__v2di)__W);
  359. }
  360. static __inline__ __m128i __DEFAULT_FN_ATTRS
  361. _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
  362. {
  363. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
  364. (__v2di)_mm_mul_epi32(__X, __Y),
  365. (__v2di)_mm_setzero_si128());
  366. }
  367. static __inline__ __m256i __DEFAULT_FN_ATTRS
  368. _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
  369. {
  370. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
  371. (__v4di)_mm256_mul_epu32(__X, __Y),
  372. (__v4di)__W);
  373. }
  374. static __inline__ __m256i __DEFAULT_FN_ATTRS
  375. _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
  376. {
  377. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
  378. (__v4di)_mm256_mul_epu32(__X, __Y),
  379. (__v4di)_mm256_setzero_si256());
  380. }
  381. static __inline__ __m128i __DEFAULT_FN_ATTRS
  382. _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
  383. {
  384. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
  385. (__v2di)_mm_mul_epu32(__X, __Y),
  386. (__v2di)__W);
  387. }
  388. static __inline__ __m128i __DEFAULT_FN_ATTRS
  389. _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
  390. {
  391. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
  392. (__v2di)_mm_mul_epu32(__X, __Y),
  393. (__v2di)_mm_setzero_si128());
  394. }
  395. static __inline__ __m256i __DEFAULT_FN_ATTRS
  396. _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
  397. {
  398. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  399. (__v8si)_mm256_mullo_epi32(__A, __B),
  400. (__v8si)_mm256_setzero_si256());
  401. }
  402. static __inline__ __m256i __DEFAULT_FN_ATTRS
  403. _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
  404. {
  405. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  406. (__v8si)_mm256_mullo_epi32(__A, __B),
  407. (__v8si)__W);
  408. }
  409. static __inline__ __m128i __DEFAULT_FN_ATTRS
  410. _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
  411. {
  412. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  413. (__v4si)_mm_mullo_epi32(__A, __B),
  414. (__v4si)_mm_setzero_si128());
  415. }
  416. static __inline__ __m128i __DEFAULT_FN_ATTRS
  417. _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
  418. {
  419. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  420. (__v4si)_mm_mullo_epi32(__A, __B),
  421. (__v4si)__W);
  422. }
  423. static __inline__ __m256i __DEFAULT_FN_ATTRS
  424. _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  425. {
  426. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  427. (__v8si)_mm256_and_si256(__A, __B),
  428. (__v8si)__W);
  429. }
  430. static __inline__ __m256i __DEFAULT_FN_ATTRS
  431. _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  432. {
  433. return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
  434. }
  435. static __inline__ __m128i __DEFAULT_FN_ATTRS
  436. _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  437. {
  438. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  439. (__v4si)_mm_and_si128(__A, __B),
  440. (__v4si)__W);
  441. }
  442. static __inline__ __m128i __DEFAULT_FN_ATTRS
  443. _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  444. {
  445. return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
  446. }
  447. static __inline__ __m256i __DEFAULT_FN_ATTRS
  448. _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  449. {
  450. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  451. (__v8si)_mm256_andnot_si256(__A, __B),
  452. (__v8si)__W);
  453. }
  454. static __inline__ __m256i __DEFAULT_FN_ATTRS
  455. _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  456. {
  457. return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
  458. __U, __A, __B);
  459. }
  460. static __inline__ __m128i __DEFAULT_FN_ATTRS
  461. _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  462. {
  463. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  464. (__v4si)_mm_andnot_si128(__A, __B),
  465. (__v4si)__W);
  466. }
  467. static __inline__ __m128i __DEFAULT_FN_ATTRS
  468. _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  469. {
  470. return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
  471. }
  472. static __inline__ __m256i __DEFAULT_FN_ATTRS
  473. _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  474. {
  475. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  476. (__v8si)_mm256_or_si256(__A, __B),
  477. (__v8si)__W);
  478. }
  479. static __inline__ __m256i __DEFAULT_FN_ATTRS
  480. _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  481. {
  482. return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
  483. }
  484. static __inline__ __m128i __DEFAULT_FN_ATTRS
  485. _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  486. {
  487. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  488. (__v4si)_mm_or_si128(__A, __B),
  489. (__v4si)__W);
  490. }
  491. static __inline__ __m128i __DEFAULT_FN_ATTRS
  492. _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  493. {
  494. return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
  495. }
  496. static __inline__ __m256i __DEFAULT_FN_ATTRS
  497. _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  498. {
  499. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  500. (__v8si)_mm256_xor_si256(__A, __B),
  501. (__v8si)__W);
  502. }
  503. static __inline__ __m256i __DEFAULT_FN_ATTRS
  504. _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  505. {
  506. return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
  507. }
  508. static __inline__ __m128i __DEFAULT_FN_ATTRS
  509. _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
  510. __m128i __B)
  511. {
  512. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  513. (__v4si)_mm_xor_si128(__A, __B),
  514. (__v4si)__W);
  515. }
  516. static __inline__ __m128i __DEFAULT_FN_ATTRS
  517. _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  518. {
  519. return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
  520. }
  521. static __inline__ __m256i __DEFAULT_FN_ATTRS
  522. _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  523. {
  524. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  525. (__v4di)_mm256_and_si256(__A, __B),
  526. (__v4di)__W);
  527. }
  528. static __inline__ __m256i __DEFAULT_FN_ATTRS
  529. _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  530. {
  531. return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
  532. }
  533. static __inline__ __m128i __DEFAULT_FN_ATTRS
  534. _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  535. {
  536. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  537. (__v2di)_mm_and_si128(__A, __B),
  538. (__v2di)__W);
  539. }
  540. static __inline__ __m128i __DEFAULT_FN_ATTRS
  541. _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  542. {
  543. return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
  544. }
  545. static __inline__ __m256i __DEFAULT_FN_ATTRS
  546. _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  547. {
  548. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  549. (__v4di)_mm256_andnot_si256(__A, __B),
  550. (__v4di)__W);
  551. }
  552. static __inline__ __m256i __DEFAULT_FN_ATTRS
  553. _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  554. {
  555. return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
  556. __U, __A, __B);
  557. }
  558. static __inline__ __m128i __DEFAULT_FN_ATTRS
  559. _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  560. {
  561. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  562. (__v2di)_mm_andnot_si128(__A, __B),
  563. (__v2di)__W);
  564. }
  565. static __inline__ __m128i __DEFAULT_FN_ATTRS
  566. _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  567. {
  568. return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
  569. }
  570. static __inline__ __m256i __DEFAULT_FN_ATTRS
  571. _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  572. {
  573. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  574. (__v4di)_mm256_or_si256(__A, __B),
  575. (__v4di)__W);
  576. }
  577. static __inline__ __m256i __DEFAULT_FN_ATTRS
  578. _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  579. {
  580. return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
  581. }
  582. static __inline__ __m128i __DEFAULT_FN_ATTRS
  583. _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  584. {
  585. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  586. (__v2di)_mm_or_si128(__A, __B),
  587. (__v2di)__W);
  588. }
  589. static __inline__ __m128i __DEFAULT_FN_ATTRS
  590. _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  591. {
  592. return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
  593. }
  594. static __inline__ __m256i __DEFAULT_FN_ATTRS
  595. _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  596. {
  597. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  598. (__v4di)_mm256_xor_si256(__A, __B),
  599. (__v4di)__W);
  600. }
  601. static __inline__ __m256i __DEFAULT_FN_ATTRS
  602. _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  603. {
  604. return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
  605. }
  606. static __inline__ __m128i __DEFAULT_FN_ATTRS
  607. _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
  608. __m128i __B)
  609. {
  610. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  611. (__v2di)_mm_xor_si128(__A, __B),
  612. (__v2di)__W);
  613. }
  614. static __inline__ __m128i __DEFAULT_FN_ATTRS
  615. _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  616. {
  617. return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
  618. }
  619. #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
  620. (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
  621. (__v4si)(__m128i)(b), (int)(p), \
  622. (__mmask8)-1); })
  623. #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
  624. (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
  625. (__v4si)(__m128i)(b), (int)(p), \
  626. (__mmask8)(m)); })
  627. #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
  628. (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
  629. (__v4si)(__m128i)(b), (int)(p), \
  630. (__mmask8)-1); })
  631. #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
  632. (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
  633. (__v4si)(__m128i)(b), (int)(p), \
  634. (__mmask8)(m)); })
  635. #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
  636. (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
  637. (__v8si)(__m256i)(b), (int)(p), \
  638. (__mmask8)-1); })
  639. #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
  640. (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
  641. (__v8si)(__m256i)(b), (int)(p), \
  642. (__mmask8)(m)); })
  643. #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
  644. (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
  645. (__v8si)(__m256i)(b), (int)(p), \
  646. (__mmask8)-1); })
  647. #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
  648. (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
  649. (__v8si)(__m256i)(b), (int)(p), \
  650. (__mmask8)(m)); })
  651. #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
  652. (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
  653. (__v2di)(__m128i)(b), (int)(p), \
  654. (__mmask8)-1); })
  655. #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
  656. (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
  657. (__v2di)(__m128i)(b), (int)(p), \
  658. (__mmask8)(m)); })
  659. #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
  660. (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
  661. (__v2di)(__m128i)(b), (int)(p), \
  662. (__mmask8)-1); })
  663. #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
  664. (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
  665. (__v2di)(__m128i)(b), (int)(p), \
  666. (__mmask8)(m)); })
  667. #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
  668. (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
  669. (__v4di)(__m256i)(b), (int)(p), \
  670. (__mmask8)-1); })
  671. #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
  672. (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
  673. (__v4di)(__m256i)(b), (int)(p), \
  674. (__mmask8)(m)); })
  675. #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
  676. (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
  677. (__v4di)(__m256i)(b), (int)(p), \
  678. (__mmask8)-1); })
  679. #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
  680. (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
  681. (__v4di)(__m256i)(b), (int)(p), \
  682. (__mmask8)(m)); })
  683. #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \
  684. (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
  685. (__v8sf)(__m256)(b), (int)(p), \
  686. (__mmask8)-1); })
  687. #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
  688. (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
  689. (__v8sf)(__m256)(b), (int)(p), \
  690. (__mmask8)(m)); })
  691. #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \
  692. (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
  693. (__v4df)(__m256d)(b), (int)(p), \
  694. (__mmask8)-1); })
  695. #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
  696. (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
  697. (__v4df)(__m256d)(b), (int)(p), \
  698. (__mmask8)(m)); })
  699. #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \
  700. (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
  701. (__v4sf)(__m128)(b), (int)(p), \
  702. (__mmask8)-1); })
  703. #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
  704. (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
  705. (__v4sf)(__m128)(b), (int)(p), \
  706. (__mmask8)(m)); })
  707. #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \
  708. (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
  709. (__v2df)(__m128d)(b), (int)(p), \
  710. (__mmask8)-1); })
  711. #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
  712. (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
  713. (__v2df)(__m128d)(b), (int)(p), \
  714. (__mmask8)(m)); })
  715. static __inline__ __m128d __DEFAULT_FN_ATTRS
  716. _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  717. {
  718. return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
  719. (__v2df) __B,
  720. (__v2df) __C,
  721. (__mmask8) __U);
  722. }
  723. static __inline__ __m128d __DEFAULT_FN_ATTRS
  724. _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
  725. {
  726. return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
  727. (__v2df) __B,
  728. (__v2df) __C,
  729. (__mmask8) __U);
  730. }
  731. static __inline__ __m128d __DEFAULT_FN_ATTRS
  732. _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
  733. {
  734. return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
  735. (__v2df) __B,
  736. (__v2df) __C,
  737. (__mmask8) __U);
  738. }
  739. static __inline__ __m128d __DEFAULT_FN_ATTRS
  740. _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  741. {
  742. return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
  743. (__v2df) __B,
  744. -(__v2df) __C,
  745. (__mmask8) __U);
  746. }
  747. static __inline__ __m128d __DEFAULT_FN_ATTRS
  748. _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
  749. {
  750. return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
  751. (__v2df) __B,
  752. -(__v2df) __C,
  753. (__mmask8) __U);
  754. }
  755. static __inline__ __m128d __DEFAULT_FN_ATTRS
  756. _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
  757. {
  758. return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
  759. (__v2df) __B,
  760. (__v2df) __C,
  761. (__mmask8) __U);
  762. }
  763. static __inline__ __m128d __DEFAULT_FN_ATTRS
  764. _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
  765. {
  766. return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
  767. (__v2df) __B,
  768. (__v2df) __C,
  769. (__mmask8) __U);
  770. }
  771. static __inline__ __m128d __DEFAULT_FN_ATTRS
  772. _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
  773. {
  774. return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
  775. (__v2df) __B,
  776. -(__v2df) __C,
  777. (__mmask8) __U);
  778. }
  779. static __inline__ __m256d __DEFAULT_FN_ATTRS
  780. _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
  781. {
  782. return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
  783. (__v4df) __B,
  784. (__v4df) __C,
  785. (__mmask8) __U);
  786. }
  787. static __inline__ __m256d __DEFAULT_FN_ATTRS
  788. _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
  789. {
  790. return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
  791. (__v4df) __B,
  792. (__v4df) __C,
  793. (__mmask8) __U);
  794. }
  795. static __inline__ __m256d __DEFAULT_FN_ATTRS
  796. _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
  797. {
  798. return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
  799. (__v4df) __B,
  800. (__v4df) __C,
  801. (__mmask8) __U);
  802. }
  803. static __inline__ __m256d __DEFAULT_FN_ATTRS
  804. _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
  805. {
  806. return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
  807. (__v4df) __B,
  808. -(__v4df) __C,
  809. (__mmask8) __U);
  810. }
  811. static __inline__ __m256d __DEFAULT_FN_ATTRS
  812. _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
  813. {
  814. return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
  815. (__v4df) __B,
  816. -(__v4df) __C,
  817. (__mmask8) __U);
  818. }
  819. static __inline__ __m256d __DEFAULT_FN_ATTRS
  820. _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
  821. {
  822. return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
  823. (__v4df) __B,
  824. (__v4df) __C,
  825. (__mmask8) __U);
  826. }
  827. static __inline__ __m256d __DEFAULT_FN_ATTRS
  828. _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
  829. {
  830. return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
  831. (__v4df) __B,
  832. (__v4df) __C,
  833. (__mmask8) __U);
  834. }
  835. static __inline__ __m256d __DEFAULT_FN_ATTRS
  836. _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
  837. {
  838. return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
  839. (__v4df) __B,
  840. -(__v4df) __C,
  841. (__mmask8) __U);
  842. }
  843. static __inline__ __m128 __DEFAULT_FN_ATTRS
  844. _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  845. {
  846. return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
  847. (__v4sf) __B,
  848. (__v4sf) __C,
  849. (__mmask8) __U);
  850. }
  851. static __inline__ __m128 __DEFAULT_FN_ATTRS
  852. _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  853. {
  854. return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
  855. (__v4sf) __B,
  856. (__v4sf) __C,
  857. (__mmask8) __U);
  858. }
  859. static __inline__ __m128 __DEFAULT_FN_ATTRS
  860. _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  861. {
  862. return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
  863. (__v4sf) __B,
  864. (__v4sf) __C,
  865. (__mmask8) __U);
  866. }
  867. static __inline__ __m128 __DEFAULT_FN_ATTRS
  868. _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  869. {
  870. return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
  871. (__v4sf) __B,
  872. -(__v4sf) __C,
  873. (__mmask8) __U);
  874. }
  875. static __inline__ __m128 __DEFAULT_FN_ATTRS
  876. _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  877. {
  878. return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
  879. (__v4sf) __B,
  880. -(__v4sf) __C,
  881. (__mmask8) __U);
  882. }
  883. static __inline__ __m128 __DEFAULT_FN_ATTRS
  884. _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  885. {
  886. return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
  887. (__v4sf) __B,
  888. (__v4sf) __C,
  889. (__mmask8) __U);
  890. }
  891. static __inline__ __m128 __DEFAULT_FN_ATTRS
  892. _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  893. {
  894. return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
  895. (__v4sf) __B,
  896. (__v4sf) __C,
  897. (__mmask8) __U);
  898. }
  899. static __inline__ __m128 __DEFAULT_FN_ATTRS
  900. _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  901. {
  902. return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
  903. (__v4sf) __B,
  904. -(__v4sf) __C,
  905. (__mmask8) __U);
  906. }
  907. static __inline__ __m256 __DEFAULT_FN_ATTRS
  908. _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  909. {
  910. return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
  911. (__v8sf) __B,
  912. (__v8sf) __C,
  913. (__mmask8) __U);
  914. }
  915. static __inline__ __m256 __DEFAULT_FN_ATTRS
  916. _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
  917. {
  918. return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
  919. (__v8sf) __B,
  920. (__v8sf) __C,
  921. (__mmask8) __U);
  922. }
  923. static __inline__ __m256 __DEFAULT_FN_ATTRS
  924. _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
  925. {
  926. return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
  927. (__v8sf) __B,
  928. (__v8sf) __C,
  929. (__mmask8) __U);
  930. }
  931. static __inline__ __m256 __DEFAULT_FN_ATTRS
  932. _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  933. {
  934. return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
  935. (__v8sf) __B,
  936. -(__v8sf) __C,
  937. (__mmask8) __U);
  938. }
  939. static __inline__ __m256 __DEFAULT_FN_ATTRS
  940. _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
  941. {
  942. return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
  943. (__v8sf) __B,
  944. -(__v8sf) __C,
  945. (__mmask8) __U);
  946. }
  947. static __inline__ __m256 __DEFAULT_FN_ATTRS
  948. _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
  949. {
  950. return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
  951. (__v8sf) __B,
  952. (__v8sf) __C,
  953. (__mmask8) __U);
  954. }
  955. static __inline__ __m256 __DEFAULT_FN_ATTRS
  956. _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
  957. {
  958. return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
  959. (__v8sf) __B,
  960. (__v8sf) __C,
  961. (__mmask8) __U);
  962. }
  963. static __inline__ __m256 __DEFAULT_FN_ATTRS
  964. _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
  965. {
  966. return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
  967. (__v8sf) __B,
  968. -(__v8sf) __C,
  969. (__mmask8) __U);
  970. }
  971. static __inline__ __m128d __DEFAULT_FN_ATTRS
  972. _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  973. {
  974. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
  975. (__v2df) __B,
  976. (__v2df) __C,
  977. (__mmask8) __U);
  978. }
  979. static __inline__ __m128d __DEFAULT_FN_ATTRS
  980. _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
  981. {
  982. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
  983. (__v2df) __B,
  984. (__v2df) __C,
  985. (__mmask8)
  986. __U);
  987. }
  988. static __inline__ __m128d __DEFAULT_FN_ATTRS
  989. _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
  990. {
  991. return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
  992. (__v2df) __B,
  993. (__v2df) __C,
  994. (__mmask8)
  995. __U);
  996. }
  997. static __inline__ __m128d __DEFAULT_FN_ATTRS
  998. _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  999. {
  1000. return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
  1001. (__v2df) __B,
  1002. -(__v2df) __C,
  1003. (__mmask8) __U);
  1004. }
  1005. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1006. _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
  1007. {
  1008. return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
  1009. (__v2df) __B,
  1010. -(__v2df) __C,
  1011. (__mmask8)
  1012. __U);
  1013. }
  1014. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1015. _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
  1016. {
  1017. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
  1018. (__v4df) __B,
  1019. (__v4df) __C,
  1020. (__mmask8) __U);
  1021. }
  1022. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1023. _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
  1024. {
  1025. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
  1026. (__v4df) __B,
  1027. (__v4df) __C,
  1028. (__mmask8)
  1029. __U);
  1030. }
  1031. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1032. _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
  1033. {
  1034. return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
  1035. (__v4df) __B,
  1036. (__v4df) __C,
  1037. (__mmask8)
  1038. __U);
  1039. }
  1040. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1041. _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
  1042. {
  1043. return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
  1044. (__v4df) __B,
  1045. -(__v4df) __C,
  1046. (__mmask8) __U);
  1047. }
  1048. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1049. _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
  1050. {
  1051. return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
  1052. (__v4df) __B,
  1053. -(__v4df) __C,
  1054. (__mmask8)
  1055. __U);
  1056. }
  1057. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1058. _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  1059. {
  1060. return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
  1061. (__v4sf) __B,
  1062. (__v4sf) __C,
  1063. (__mmask8) __U);
  1064. }
  1065. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1066. _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  1067. {
  1068. return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
  1069. (__v4sf) __B,
  1070. (__v4sf) __C,
  1071. (__mmask8) __U);
  1072. }
  1073. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1074. _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  1075. {
  1076. return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
  1077. (__v4sf) __B,
  1078. (__v4sf) __C,
  1079. (__mmask8) __U);
  1080. }
  1081. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1082. _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  1083. {
  1084. return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
  1085. (__v4sf) __B,
  1086. -(__v4sf) __C,
  1087. (__mmask8) __U);
  1088. }
  1089. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1090. _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
  1091. {
  1092. return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
  1093. (__v4sf) __B,
  1094. -(__v4sf) __C,
  1095. (__mmask8) __U);
  1096. }
  1097. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1098. _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
  1099. __m256 __C)
  1100. {
  1101. return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
  1102. (__v8sf) __B,
  1103. (__v8sf) __C,
  1104. (__mmask8) __U);
  1105. }
  1106. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1107. _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
  1108. {
  1109. return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
  1110. (__v8sf) __B,
  1111. (__v8sf) __C,
  1112. (__mmask8) __U);
  1113. }
  1114. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1115. _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
  1116. {
  1117. return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
  1118. (__v8sf) __B,
  1119. (__v8sf) __C,
  1120. (__mmask8) __U);
  1121. }
  1122. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1123. _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  1124. {
  1125. return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
  1126. (__v8sf) __B,
  1127. -(__v8sf) __C,
  1128. (__mmask8) __U);
  1129. }
  1130. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1131. _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
  1132. {
  1133. return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
  1134. (__v8sf) __B,
  1135. -(__v8sf) __C,
  1136. (__mmask8) __U);
  1137. }
  1138. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1139. _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
  1140. {
  1141. return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
  1142. (__v2df) __B,
  1143. (__v2df) __C,
  1144. (__mmask8) __U);
  1145. }
  1146. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1147. _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
  1148. {
  1149. return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
  1150. (__v4df) __B,
  1151. (__v4df) __C,
  1152. (__mmask8) __U);
  1153. }
  1154. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1155. _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  1156. {
  1157. return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
  1158. (__v4sf) __B,
  1159. (__v4sf) __C,
  1160. (__mmask8) __U);
  1161. }
  1162. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1163. _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
  1164. {
  1165. return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
  1166. (__v8sf) __B,
  1167. (__v8sf) __C,
  1168. (__mmask8) __U);
  1169. }
  1170. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1171. _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
  1172. {
  1173. return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
  1174. (__v2df) __B,
  1175. (__v2df) __C,
  1176. (__mmask8)
  1177. __U);
  1178. }
  1179. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1180. _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
  1181. {
  1182. return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
  1183. (__v4df) __B,
  1184. (__v4df) __C,
  1185. (__mmask8)
  1186. __U);
  1187. }
  1188. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1189. _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  1190. {
  1191. return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
  1192. (__v4sf) __B,
  1193. (__v4sf) __C,
  1194. (__mmask8) __U);
  1195. }
  1196. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1197. _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
  1198. {
  1199. return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
  1200. (__v8sf) __B,
  1201. (__v8sf) __C,
  1202. (__mmask8) __U);
  1203. }
  1204. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1205. _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  1206. {
  1207. return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
  1208. (__v2df) __B,
  1209. (__v2df) __C,
  1210. (__mmask8) __U);
  1211. }
  1212. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1213. _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
  1214. {
  1215. return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
  1216. (__v4df) __B,
  1217. (__v4df) __C,
  1218. (__mmask8) __U);
  1219. }
  1220. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1221. _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  1222. {
  1223. return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
  1224. (__v4sf) __B,
  1225. (__v4sf) __C,
  1226. (__mmask8) __U);
  1227. }
  1228. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1229. _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  1230. {
  1231. return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
  1232. (__v8sf) __B,
  1233. (__v8sf) __C,
  1234. (__mmask8) __U);
  1235. }
  1236. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1237. _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
  1238. {
  1239. return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
  1240. (__v2df) __B,
  1241. (__v2df) __C,
  1242. (__mmask8) __U);
  1243. }
  1244. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1245. _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
  1246. {
  1247. return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
  1248. (__v2df) __B,
  1249. (__v2df) __C,
  1250. (__mmask8) __U);
  1251. }
  1252. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1253. _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
  1254. {
  1255. return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
  1256. (__v4df) __B,
  1257. (__v4df) __C,
  1258. (__mmask8) __U);
  1259. }
  1260. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1261. _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
  1262. {
  1263. return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
  1264. (__v4df) __B,
  1265. (__v4df) __C,
  1266. (__mmask8) __U);
  1267. }
  1268. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1269. _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
  1270. {
  1271. return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
  1272. (__v4sf) __B,
  1273. (__v4sf) __C,
  1274. (__mmask8) __U);
  1275. }
  1276. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1277. _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
  1278. {
  1279. return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
  1280. (__v4sf) __B,
  1281. (__v4sf) __C,
  1282. (__mmask8) __U);
  1283. }
  1284. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1285. _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
  1286. {
  1287. return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
  1288. (__v8sf) __B,
  1289. (__v8sf) __C,
  1290. (__mmask8) __U);
  1291. }
  1292. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1293. _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
  1294. {
  1295. return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
  1296. (__v8sf) __B,
  1297. (__v8sf) __C,
  1298. (__mmask8) __U);
  1299. }
  1300. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1301. _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  1302. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  1303. (__v2df)_mm_add_pd(__A, __B),
  1304. (__v2df)__W);
  1305. }
  1306. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1307. _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
  1308. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  1309. (__v2df)_mm_add_pd(__A, __B),
  1310. (__v2df)_mm_setzero_pd());
  1311. }
  1312. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1313. _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  1314. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  1315. (__v4df)_mm256_add_pd(__A, __B),
  1316. (__v4df)__W);
  1317. }
  1318. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1319. _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
  1320. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  1321. (__v4df)_mm256_add_pd(__A, __B),
  1322. (__v4df)_mm256_setzero_pd());
  1323. }
  1324. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1325. _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  1326. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  1327. (__v4sf)_mm_add_ps(__A, __B),
  1328. (__v4sf)__W);
  1329. }
  1330. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1331. _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
  1332. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  1333. (__v4sf)_mm_add_ps(__A, __B),
  1334. (__v4sf)_mm_setzero_ps());
  1335. }
  1336. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1337. _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  1338. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  1339. (__v8sf)_mm256_add_ps(__A, __B),
  1340. (__v8sf)__W);
  1341. }
  1342. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1343. _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
  1344. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  1345. (__v8sf)_mm256_add_ps(__A, __B),
  1346. (__v8sf)_mm256_setzero_ps());
  1347. }
  1348. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1349. _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
  1350. return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
  1351. (__v4si) __W,
  1352. (__v4si) __A);
  1353. }
  1354. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1355. _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
  1356. return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
  1357. (__v8si) __W,
  1358. (__v8si) __A);
  1359. }
  1360. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1361. _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
  1362. return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
  1363. (__v2df) __W,
  1364. (__v2df) __A);
  1365. }
  1366. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1367. _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
  1368. return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
  1369. (__v4df) __W,
  1370. (__v4df) __A);
  1371. }
  1372. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1373. _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
  1374. return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
  1375. (__v4sf) __W,
  1376. (__v4sf) __A);
  1377. }
  1378. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1379. _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
  1380. return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
  1381. (__v8sf) __W,
  1382. (__v8sf) __A);
  1383. }
  1384. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1385. _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
  1386. return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
  1387. (__v2di) __W,
  1388. (__v2di) __A);
  1389. }
  1390. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1391. _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
  1392. return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
  1393. (__v4di) __W,
  1394. (__v4di) __A);
  1395. }
  1396. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1397. _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
  1398. return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
  1399. (__v2df) __W,
  1400. (__mmask8) __U);
  1401. }
  1402. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1403. _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
  1404. return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
  1405. (__v2df)
  1406. _mm_setzero_pd (),
  1407. (__mmask8) __U);
  1408. }
  1409. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1410. _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
  1411. return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
  1412. (__v4df) __W,
  1413. (__mmask8) __U);
  1414. }
  1415. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1416. _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
  1417. return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
  1418. (__v4df)
  1419. _mm256_setzero_pd (),
  1420. (__mmask8) __U);
  1421. }
  1422. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1423. _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
  1424. return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
  1425. (__v2di) __W,
  1426. (__mmask8) __U);
  1427. }
  1428. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1429. _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
  1430. return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
  1431. (__v2di)
  1432. _mm_setzero_si128 (),
  1433. (__mmask8) __U);
  1434. }
  1435. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1436. _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
  1437. return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
  1438. (__v4di) __W,
  1439. (__mmask8) __U);
  1440. }
  1441. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1442. _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
  1443. return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
  1444. (__v4di)
  1445. _mm256_setzero_si256 (),
  1446. (__mmask8) __U);
  1447. }
  1448. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1449. _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
  1450. return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
  1451. (__v4sf) __W,
  1452. (__mmask8) __U);
  1453. }
  1454. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1455. _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
  1456. return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
  1457. (__v4sf)
  1458. _mm_setzero_ps (),
  1459. (__mmask8) __U);
  1460. }
  1461. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1462. _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
  1463. return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
  1464. (__v8sf) __W,
  1465. (__mmask8) __U);
  1466. }
  1467. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1468. _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
  1469. return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
  1470. (__v8sf)
  1471. _mm256_setzero_ps (),
  1472. (__mmask8) __U);
  1473. }
  1474. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1475. _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
  1476. return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
  1477. (__v4si) __W,
  1478. (__mmask8) __U);
  1479. }
  1480. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1481. _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
  1482. return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
  1483. (__v4si)
  1484. _mm_setzero_si128 (),
  1485. (__mmask8) __U);
  1486. }
  1487. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1488. _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
  1489. return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
  1490. (__v8si) __W,
  1491. (__mmask8) __U);
  1492. }
  1493. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1494. _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
  1495. return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
  1496. (__v8si)
  1497. _mm256_setzero_si256 (),
  1498. (__mmask8) __U);
  1499. }
  1500. static __inline__ void __DEFAULT_FN_ATTRS
  1501. _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
  1502. __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
  1503. (__v2df) __A,
  1504. (__mmask8) __U);
  1505. }
  1506. static __inline__ void __DEFAULT_FN_ATTRS
  1507. _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
  1508. __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
  1509. (__v4df) __A,
  1510. (__mmask8) __U);
  1511. }
  1512. static __inline__ void __DEFAULT_FN_ATTRS
  1513. _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
  1514. __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
  1515. (__v2di) __A,
  1516. (__mmask8) __U);
  1517. }
  1518. static __inline__ void __DEFAULT_FN_ATTRS
  1519. _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
  1520. __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
  1521. (__v4di) __A,
  1522. (__mmask8) __U);
  1523. }
  1524. static __inline__ void __DEFAULT_FN_ATTRS
  1525. _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
  1526. __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
  1527. (__v4sf) __A,
  1528. (__mmask8) __U);
  1529. }
  1530. static __inline__ void __DEFAULT_FN_ATTRS
  1531. _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
  1532. __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
  1533. (__v8sf) __A,
  1534. (__mmask8) __U);
  1535. }
  1536. static __inline__ void __DEFAULT_FN_ATTRS
  1537. _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
  1538. __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
  1539. (__v4si) __A,
  1540. (__mmask8) __U);
  1541. }
  1542. static __inline__ void __DEFAULT_FN_ATTRS
  1543. _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
  1544. __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
  1545. (__v8si) __A,
  1546. (__mmask8) __U);
  1547. }
  1548. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1549. _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
  1550. return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
  1551. (__v2df)_mm_cvtepi32_pd(__A),
  1552. (__v2df)__W);
  1553. }
  1554. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1555. _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
  1556. return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
  1557. (__v2df)_mm_cvtepi32_pd(__A),
  1558. (__v2df)_mm_setzero_pd());
  1559. }
  1560. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1561. _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
  1562. return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
  1563. (__v4df)_mm256_cvtepi32_pd(__A),
  1564. (__v4df)__W);
  1565. }
  1566. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1567. _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
  1568. return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
  1569. (__v4df)_mm256_cvtepi32_pd(__A),
  1570. (__v4df)_mm256_setzero_pd());
  1571. }
  1572. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1573. _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
  1574. return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
  1575. (__v4sf) __W,
  1576. (__mmask8) __U);
  1577. }
  1578. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1579. _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
  1580. return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
  1581. (__v4sf)
  1582. _mm_setzero_ps (),
  1583. (__mmask8) __U);
  1584. }
  1585. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1586. _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
  1587. return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
  1588. (__v8sf) __W,
  1589. (__mmask8) __U);
  1590. }
  1591. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1592. _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
  1593. return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
  1594. (__v8sf)
  1595. _mm256_setzero_ps (),
  1596. (__mmask8) __U);
  1597. }
  1598. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1599. _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
  1600. return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
  1601. (__v4si) __W,
  1602. (__mmask8) __U);
  1603. }
  1604. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1605. _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
  1606. return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
  1607. (__v4si)
  1608. _mm_setzero_si128 (),
  1609. (__mmask8) __U);
  1610. }
  1611. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1612. _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
  1613. return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
  1614. (__v4si) __W,
  1615. (__mmask8) __U);
  1616. }
  1617. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1618. _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
  1619. return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
  1620. (__v4si)
  1621. _mm_setzero_si128 (),
  1622. (__mmask8) __U);
  1623. }
  1624. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1625. _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
  1626. return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
  1627. (__v4sf) __W,
  1628. (__mmask8) __U);
  1629. }
  1630. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1631. _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
  1632. return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
  1633. (__v4sf)
  1634. _mm_setzero_ps (),
  1635. (__mmask8) __U);
  1636. }
  1637. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1638. _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
  1639. return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
  1640. (__v4sf) __W,
  1641. (__mmask8) __U);
  1642. }
  1643. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1644. _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
  1645. return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
  1646. (__v4sf)
  1647. _mm_setzero_ps (),
  1648. (__mmask8) __U);
  1649. }
  1650. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1651. _mm_cvtpd_epu32 (__m128d __A) {
  1652. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  1653. (__v4si)
  1654. _mm_setzero_si128 (),
  1655. (__mmask8) -1);
  1656. }
  1657. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1658. _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
  1659. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  1660. (__v4si) __W,
  1661. (__mmask8) __U);
  1662. }
  1663. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1664. _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
  1665. return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
  1666. (__v4si)
  1667. _mm_setzero_si128 (),
  1668. (__mmask8) __U);
  1669. }
  1670. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1671. _mm256_cvtpd_epu32 (__m256d __A) {
  1672. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  1673. (__v4si)
  1674. _mm_setzero_si128 (),
  1675. (__mmask8) -1);
  1676. }
  1677. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1678. _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
  1679. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  1680. (__v4si) __W,
  1681. (__mmask8) __U);
  1682. }
  1683. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1684. _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
  1685. return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
  1686. (__v4si)
  1687. _mm_setzero_si128 (),
  1688. (__mmask8) __U);
  1689. }
  1690. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1691. _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
  1692. return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
  1693. (__v4si) __W,
  1694. (__mmask8) __U);
  1695. }
  1696. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1697. _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
  1698. return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
  1699. (__v4si)
  1700. _mm_setzero_si128 (),
  1701. (__mmask8) __U);
  1702. }
  1703. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1704. _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
  1705. return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
  1706. (__v8si) __W,
  1707. (__mmask8) __U);
  1708. }
  1709. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1710. _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
  1711. return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
  1712. (__v8si)
  1713. _mm256_setzero_si256 (),
  1714. (__mmask8) __U);
  1715. }
  1716. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1717. _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
  1718. return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
  1719. (__v2df) __W,
  1720. (__mmask8) __U);
  1721. }
  1722. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1723. _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
  1724. return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
  1725. (__v2df)
  1726. _mm_setzero_pd (),
  1727. (__mmask8) __U);
  1728. }
  1729. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1730. _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
  1731. return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
  1732. (__v4df) __W,
  1733. (__mmask8) __U);
  1734. }
  1735. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1736. _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
  1737. return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
  1738. (__v4df)
  1739. _mm256_setzero_pd (),
  1740. (__mmask8) __U);
  1741. }
  1742. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1743. _mm_cvtps_epu32 (__m128 __A) {
  1744. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  1745. (__v4si)
  1746. _mm_setzero_si128 (),
  1747. (__mmask8) -1);
  1748. }
  1749. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1750. _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
  1751. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  1752. (__v4si) __W,
  1753. (__mmask8) __U);
  1754. }
  1755. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1756. _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
  1757. return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
  1758. (__v4si)
  1759. _mm_setzero_si128 (),
  1760. (__mmask8) __U);
  1761. }
  1762. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1763. _mm256_cvtps_epu32 (__m256 __A) {
  1764. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  1765. (__v8si)
  1766. _mm256_setzero_si256 (),
  1767. (__mmask8) -1);
  1768. }
  1769. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1770. _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
  1771. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  1772. (__v8si) __W,
  1773. (__mmask8) __U);
  1774. }
  1775. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1776. _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
  1777. return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
  1778. (__v8si)
  1779. _mm256_setzero_si256 (),
  1780. (__mmask8) __U);
  1781. }
  1782. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1783. _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
  1784. return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
  1785. (__v4si) __W,
  1786. (__mmask8) __U);
  1787. }
  1788. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1789. _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
  1790. return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
  1791. (__v4si)
  1792. _mm_setzero_si128 (),
  1793. (__mmask8) __U);
  1794. }
  1795. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1796. _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
  1797. return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
  1798. (__v4si) __W,
  1799. (__mmask8) __U);
  1800. }
  1801. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1802. _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
  1803. return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
  1804. (__v4si)
  1805. _mm_setzero_si128 (),
  1806. (__mmask8) __U);
  1807. }
  1808. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1809. _mm_cvttpd_epu32 (__m128d __A) {
  1810. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1811. (__v4si)
  1812. _mm_setzero_si128 (),
  1813. (__mmask8) -1);
  1814. }
  1815. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1816. _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
  1817. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1818. (__v4si) __W,
  1819. (__mmask8) __U);
  1820. }
  1821. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1822. _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
  1823. return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
  1824. (__v4si)
  1825. _mm_setzero_si128 (),
  1826. (__mmask8) __U);
  1827. }
  1828. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1829. _mm256_cvttpd_epu32 (__m256d __A) {
  1830. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1831. (__v4si)
  1832. _mm_setzero_si128 (),
  1833. (__mmask8) -1);
  1834. }
  1835. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1836. _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
  1837. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1838. (__v4si) __W,
  1839. (__mmask8) __U);
  1840. }
  1841. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1842. _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
  1843. return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
  1844. (__v4si)
  1845. _mm_setzero_si128 (),
  1846. (__mmask8) __U);
  1847. }
  1848. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1849. _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
  1850. return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
  1851. (__v4si) __W,
  1852. (__mmask8) __U);
  1853. }
  1854. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1855. _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
  1856. return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
  1857. (__v4si)
  1858. _mm_setzero_si128 (),
  1859. (__mmask8) __U);
  1860. }
  1861. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1862. _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
  1863. return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
  1864. (__v8si) __W,
  1865. (__mmask8) __U);
  1866. }
  1867. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1868. _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
  1869. return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
  1870. (__v8si)
  1871. _mm256_setzero_si256 (),
  1872. (__mmask8) __U);
  1873. }
  1874. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1875. _mm_cvttps_epu32 (__m128 __A) {
  1876. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  1877. (__v4si)
  1878. _mm_setzero_si128 (),
  1879. (__mmask8) -1);
  1880. }
  1881. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1882. _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
  1883. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  1884. (__v4si) __W,
  1885. (__mmask8) __U);
  1886. }
  1887. static __inline__ __m128i __DEFAULT_FN_ATTRS
  1888. _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
  1889. return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
  1890. (__v4si)
  1891. _mm_setzero_si128 (),
  1892. (__mmask8) __U);
  1893. }
  1894. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1895. _mm256_cvttps_epu32 (__m256 __A) {
  1896. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  1897. (__v8si)
  1898. _mm256_setzero_si256 (),
  1899. (__mmask8) -1);
  1900. }
  1901. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1902. _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
  1903. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  1904. (__v8si) __W,
  1905. (__mmask8) __U);
  1906. }
  1907. static __inline__ __m256i __DEFAULT_FN_ATTRS
  1908. _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
  1909. return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
  1910. (__v8si)
  1911. _mm256_setzero_si256 (),
  1912. (__mmask8) __U);
  1913. }
  1914. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1915. _mm_cvtepu32_pd (__m128i __A) {
  1916. return (__m128d) __builtin_convertvector(
  1917. __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
  1918. }
  1919. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1920. _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
  1921. return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
  1922. (__v2df)_mm_cvtepu32_pd(__A),
  1923. (__v2df)__W);
  1924. }
  1925. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1926. _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
  1927. return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
  1928. (__v2df)_mm_cvtepu32_pd(__A),
  1929. (__v2df)_mm_setzero_pd());
  1930. }
  1931. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1932. _mm256_cvtepu32_pd (__m128i __A) {
  1933. return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
  1934. }
  1935. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1936. _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
  1937. return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
  1938. (__v4df)_mm256_cvtepu32_pd(__A),
  1939. (__v4df)__W);
  1940. }
  1941. static __inline__ __m256d __DEFAULT_FN_ATTRS
  1942. _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
  1943. return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
  1944. (__v4df)_mm256_cvtepu32_pd(__A),
  1945. (__v4df)_mm256_setzero_pd());
  1946. }
  1947. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1948. _mm_cvtepu32_ps (__m128i __A) {
  1949. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1950. (__v4sf)
  1951. _mm_setzero_ps (),
  1952. (__mmask8) -1);
  1953. }
  1954. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1955. _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
  1956. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1957. (__v4sf) __W,
  1958. (__mmask8) __U);
  1959. }
  1960. static __inline__ __m128 __DEFAULT_FN_ATTRS
  1961. _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
  1962. return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
  1963. (__v4sf)
  1964. _mm_setzero_ps (),
  1965. (__mmask8) __U);
  1966. }
  1967. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1968. _mm256_cvtepu32_ps (__m256i __A) {
  1969. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1970. (__v8sf)
  1971. _mm256_setzero_ps (),
  1972. (__mmask8) -1);
  1973. }
  1974. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1975. _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
  1976. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1977. (__v8sf) __W,
  1978. (__mmask8) __U);
  1979. }
  1980. static __inline__ __m256 __DEFAULT_FN_ATTRS
  1981. _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
  1982. return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
  1983. (__v8sf)
  1984. _mm256_setzero_ps (),
  1985. (__mmask8) __U);
  1986. }
  1987. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1988. _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  1989. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  1990. (__v2df)_mm_div_pd(__A, __B),
  1991. (__v2df)__W);
  1992. }
  1993. static __inline__ __m128d __DEFAULT_FN_ATTRS
  1994. _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
  1995. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  1996. (__v2df)_mm_div_pd(__A, __B),
  1997. (__v2df)_mm_setzero_pd());
  1998. }
  1999. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2000. _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  2001. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2002. (__v4df)_mm256_div_pd(__A, __B),
  2003. (__v4df)__W);
  2004. }
  2005. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2006. _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
  2007. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2008. (__v4df)_mm256_div_pd(__A, __B),
  2009. (__v4df)_mm256_setzero_pd());
  2010. }
  2011. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2012. _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  2013. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2014. (__v4sf)_mm_div_ps(__A, __B),
  2015. (__v4sf)__W);
  2016. }
  2017. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2018. _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
  2019. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2020. (__v4sf)_mm_div_ps(__A, __B),
  2021. (__v4sf)_mm_setzero_ps());
  2022. }
  2023. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2024. _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  2025. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2026. (__v8sf)_mm256_div_ps(__A, __B),
  2027. (__v8sf)__W);
  2028. }
  2029. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2030. _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
  2031. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2032. (__v8sf)_mm256_div_ps(__A, __B),
  2033. (__v8sf)_mm256_setzero_ps());
  2034. }
  2035. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2036. _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
  2037. return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
  2038. (__v2df) __W,
  2039. (__mmask8) __U);
  2040. }
  2041. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2042. _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
  2043. return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
  2044. (__v2df)
  2045. _mm_setzero_pd (),
  2046. (__mmask8) __U);
  2047. }
  2048. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2049. _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
  2050. return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
  2051. (__v4df) __W,
  2052. (__mmask8) __U);
  2053. }
  2054. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2055. _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
  2056. return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
  2057. (__v4df)
  2058. _mm256_setzero_pd (),
  2059. (__mmask8) __U);
  2060. }
  2061. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2062. _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
  2063. return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
  2064. (__v2di) __W,
  2065. (__mmask8) __U);
  2066. }
  2067. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2068. _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
  2069. return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
  2070. (__v2di)
  2071. _mm_setzero_si128 (),
  2072. (__mmask8) __U);
  2073. }
  2074. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2075. _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
  2076. return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
  2077. (__v4di) __W,
  2078. (__mmask8) __U);
  2079. }
  2080. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2081. _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
  2082. return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
  2083. (__v4di)
  2084. _mm256_setzero_si256 (),
  2085. (__mmask8) __U);
  2086. }
  2087. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2088. _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
  2089. return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
  2090. (__v2df) __W,
  2091. (__mmask8)
  2092. __U);
  2093. }
  2094. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2095. _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
  2096. return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
  2097. (__v2df)
  2098. _mm_setzero_pd (),
  2099. (__mmask8)
  2100. __U);
  2101. }
  2102. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2103. _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
  2104. return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
  2105. (__v4df) __W,
  2106. (__mmask8)
  2107. __U);
  2108. }
  2109. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2110. _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
  2111. return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
  2112. (__v4df)
  2113. _mm256_setzero_pd (),
  2114. (__mmask8)
  2115. __U);
  2116. }
  2117. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2118. _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
  2119. return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
  2120. (__v2di) __W,
  2121. (__mmask8)
  2122. __U);
  2123. }
  2124. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2125. _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
  2126. return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
  2127. (__v2di)
  2128. _mm_setzero_si128 (),
  2129. (__mmask8)
  2130. __U);
  2131. }
  2132. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2133. _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
  2134. void const *__P) {
  2135. return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
  2136. (__v4di) __W,
  2137. (__mmask8)
  2138. __U);
  2139. }
  2140. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2141. _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
  2142. return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
  2143. (__v4di)
  2144. _mm256_setzero_si256 (),
  2145. (__mmask8)
  2146. __U);
  2147. }
  2148. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2149. _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
  2150. return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
  2151. (__v4sf) __W,
  2152. (__mmask8) __U);
  2153. }
  2154. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2155. _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
  2156. return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
  2157. (__v4sf)
  2158. _mm_setzero_ps (),
  2159. (__mmask8)
  2160. __U);
  2161. }
  2162. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2163. _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
  2164. return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
  2165. (__v8sf) __W,
  2166. (__mmask8) __U);
  2167. }
  2168. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2169. _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
  2170. return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
  2171. (__v8sf)
  2172. _mm256_setzero_ps (),
  2173. (__mmask8)
  2174. __U);
  2175. }
  2176. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2177. _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
  2178. return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
  2179. (__v4si) __W,
  2180. (__mmask8)
  2181. __U);
  2182. }
  2183. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2184. _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
  2185. return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
  2186. (__v4si)
  2187. _mm_setzero_si128 (),
  2188. (__mmask8) __U);
  2189. }
  2190. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2191. _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
  2192. void const *__P) {
  2193. return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
  2194. (__v8si) __W,
  2195. (__mmask8)
  2196. __U);
  2197. }
  2198. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2199. _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
  2200. return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
  2201. (__v8si)
  2202. _mm256_setzero_si256 (),
  2203. (__mmask8)
  2204. __U);
  2205. }
  2206. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2207. _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
  2208. return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
  2209. (__v4sf) __W,
  2210. (__mmask8) __U);
  2211. }
  2212. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2213. _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
  2214. return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
  2215. (__v4sf)
  2216. _mm_setzero_ps (),
  2217. (__mmask8) __U);
  2218. }
  2219. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2220. _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
  2221. return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
  2222. (__v8sf) __W,
  2223. (__mmask8) __U);
  2224. }
  2225. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2226. _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
  2227. return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
  2228. (__v8sf)
  2229. _mm256_setzero_ps (),
  2230. (__mmask8) __U);
  2231. }
  2232. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2233. _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
  2234. return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
  2235. (__v4si) __W,
  2236. (__mmask8) __U);
  2237. }
  2238. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2239. _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
  2240. return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
  2241. (__v4si)
  2242. _mm_setzero_si128 (),
  2243. (__mmask8) __U);
  2244. }
  2245. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2246. _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
  2247. return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
  2248. (__v8si) __W,
  2249. (__mmask8) __U);
  2250. }
  2251. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2252. _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
  2253. return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
  2254. (__v8si)
  2255. _mm256_setzero_si256 (),
  2256. (__mmask8) __U);
  2257. }
  2258. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2259. _mm_getexp_pd (__m128d __A) {
  2260. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  2261. (__v2df)
  2262. _mm_setzero_pd (),
  2263. (__mmask8) -1);
  2264. }
  2265. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2266. _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
  2267. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  2268. (__v2df) __W,
  2269. (__mmask8) __U);
  2270. }
  2271. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2272. _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
  2273. return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
  2274. (__v2df)
  2275. _mm_setzero_pd (),
  2276. (__mmask8) __U);
  2277. }
  2278. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2279. _mm256_getexp_pd (__m256d __A) {
  2280. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  2281. (__v4df)
  2282. _mm256_setzero_pd (),
  2283. (__mmask8) -1);
  2284. }
  2285. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2286. _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
  2287. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  2288. (__v4df) __W,
  2289. (__mmask8) __U);
  2290. }
  2291. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2292. _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
  2293. return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
  2294. (__v4df)
  2295. _mm256_setzero_pd (),
  2296. (__mmask8) __U);
  2297. }
  2298. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2299. _mm_getexp_ps (__m128 __A) {
  2300. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  2301. (__v4sf)
  2302. _mm_setzero_ps (),
  2303. (__mmask8) -1);
  2304. }
  2305. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2306. _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
  2307. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  2308. (__v4sf) __W,
  2309. (__mmask8) __U);
  2310. }
  2311. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2312. _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
  2313. return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
  2314. (__v4sf)
  2315. _mm_setzero_ps (),
  2316. (__mmask8) __U);
  2317. }
  2318. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2319. _mm256_getexp_ps (__m256 __A) {
  2320. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  2321. (__v8sf)
  2322. _mm256_setzero_ps (),
  2323. (__mmask8) -1);
  2324. }
  2325. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2326. _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
  2327. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  2328. (__v8sf) __W,
  2329. (__mmask8) __U);
  2330. }
  2331. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2332. _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
  2333. return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
  2334. (__v8sf)
  2335. _mm256_setzero_ps (),
  2336. (__mmask8) __U);
  2337. }
  2338. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2339. _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  2340. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  2341. (__v2df)_mm_max_pd(__A, __B),
  2342. (__v2df)__W);
  2343. }
  2344. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2345. _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
  2346. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  2347. (__v2df)_mm_max_pd(__A, __B),
  2348. (__v2df)_mm_setzero_pd());
  2349. }
  2350. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2351. _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  2352. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2353. (__v4df)_mm256_max_pd(__A, __B),
  2354. (__v4df)__W);
  2355. }
  2356. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2357. _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
  2358. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2359. (__v4df)_mm256_max_pd(__A, __B),
  2360. (__v4df)_mm256_setzero_pd());
  2361. }
  2362. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2363. _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  2364. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2365. (__v4sf)_mm_max_ps(__A, __B),
  2366. (__v4sf)__W);
  2367. }
  2368. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2369. _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
  2370. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2371. (__v4sf)_mm_max_ps(__A, __B),
  2372. (__v4sf)_mm_setzero_ps());
  2373. }
  2374. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2375. _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  2376. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2377. (__v8sf)_mm256_max_ps(__A, __B),
  2378. (__v8sf)__W);
  2379. }
  2380. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2381. _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
  2382. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2383. (__v8sf)_mm256_max_ps(__A, __B),
  2384. (__v8sf)_mm256_setzero_ps());
  2385. }
  2386. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2387. _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  2388. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  2389. (__v2df)_mm_min_pd(__A, __B),
  2390. (__v2df)__W);
  2391. }
  2392. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2393. _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
  2394. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  2395. (__v2df)_mm_min_pd(__A, __B),
  2396. (__v2df)_mm_setzero_pd());
  2397. }
  2398. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2399. _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  2400. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2401. (__v4df)_mm256_min_pd(__A, __B),
  2402. (__v4df)__W);
  2403. }
  2404. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2405. _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
  2406. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2407. (__v4df)_mm256_min_pd(__A, __B),
  2408. (__v4df)_mm256_setzero_pd());
  2409. }
  2410. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2411. _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  2412. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2413. (__v4sf)_mm_min_ps(__A, __B),
  2414. (__v4sf)__W);
  2415. }
  2416. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2417. _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
  2418. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2419. (__v4sf)_mm_min_ps(__A, __B),
  2420. (__v4sf)_mm_setzero_ps());
  2421. }
  2422. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2423. _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  2424. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2425. (__v8sf)_mm256_min_ps(__A, __B),
  2426. (__v8sf)__W);
  2427. }
  2428. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2429. _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
  2430. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2431. (__v8sf)_mm256_min_ps(__A, __B),
  2432. (__v8sf)_mm256_setzero_ps());
  2433. }
  2434. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2435. _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  2436. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  2437. (__v2df)_mm_mul_pd(__A, __B),
  2438. (__v2df)__W);
  2439. }
  2440. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2441. _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
  2442. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  2443. (__v2df)_mm_mul_pd(__A, __B),
  2444. (__v2df)_mm_setzero_pd());
  2445. }
  2446. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2447. _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  2448. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2449. (__v4df)_mm256_mul_pd(__A, __B),
  2450. (__v4df)__W);
  2451. }
  2452. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2453. _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
  2454. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  2455. (__v4df)_mm256_mul_pd(__A, __B),
  2456. (__v4df)_mm256_setzero_pd());
  2457. }
  2458. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2459. _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  2460. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2461. (__v4sf)_mm_mul_ps(__A, __B),
  2462. (__v4sf)__W);
  2463. }
  2464. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2465. _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
  2466. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  2467. (__v4sf)_mm_mul_ps(__A, __B),
  2468. (__v4sf)_mm_setzero_ps());
  2469. }
  2470. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2471. _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  2472. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2473. (__v8sf)_mm256_mul_ps(__A, __B),
  2474. (__v8sf)__W);
  2475. }
  2476. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2477. _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
  2478. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  2479. (__v8sf)_mm256_mul_ps(__A, __B),
  2480. (__v8sf)_mm256_setzero_ps());
  2481. }
  2482. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2483. _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
  2484. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  2485. (__v4si)_mm_abs_epi32(__A),
  2486. (__v4si)__W);
  2487. }
  2488. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2489. _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
  2490. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  2491. (__v4si)_mm_abs_epi32(__A),
  2492. (__v4si)_mm_setzero_si128());
  2493. }
  2494. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2495. _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
  2496. return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
  2497. (__v8si)_mm256_abs_epi32(__A),
  2498. (__v8si)__W);
  2499. }
  2500. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2501. _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
  2502. return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
  2503. (__v8si)_mm256_abs_epi32(__A),
  2504. (__v8si)_mm256_setzero_si256());
  2505. }
  2506. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2507. _mm_abs_epi64 (__m128i __A) {
  2508. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  2509. (__v2di)
  2510. _mm_setzero_si128 (),
  2511. (__mmask8) -1);
  2512. }
  2513. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2514. _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
  2515. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  2516. (__v2di) __W,
  2517. (__mmask8) __U);
  2518. }
  2519. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2520. _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
  2521. return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
  2522. (__v2di)
  2523. _mm_setzero_si128 (),
  2524. (__mmask8) __U);
  2525. }
  2526. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2527. _mm256_abs_epi64 (__m256i __A) {
  2528. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  2529. (__v4di)
  2530. _mm256_setzero_si256 (),
  2531. (__mmask8) -1);
  2532. }
  2533. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2534. _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
  2535. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  2536. (__v4di) __W,
  2537. (__mmask8) __U);
  2538. }
  2539. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2540. _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
  2541. return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
  2542. (__v4di)
  2543. _mm256_setzero_si256 (),
  2544. (__mmask8) __U);
  2545. }
  2546. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2547. _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
  2548. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2549. (__v4si)_mm_max_epi32(__A, __B),
  2550. (__v4si)_mm_setzero_si128());
  2551. }
  2552. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2553. _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
  2554. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2555. (__v4si)_mm_max_epi32(__A, __B),
  2556. (__v4si)__W);
  2557. }
  2558. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2559. _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
  2560. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2561. (__v8si)_mm256_max_epi32(__A, __B),
  2562. (__v8si)_mm256_setzero_si256());
  2563. }
  2564. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2565. _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
  2566. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2567. (__v8si)_mm256_max_epi32(__A, __B),
  2568. (__v8si)__W);
  2569. }
  2570. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2571. _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
  2572. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  2573. (__v2di) __B,
  2574. (__v2di)
  2575. _mm_setzero_si128 (),
  2576. __M);
  2577. }
  2578. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2579. _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
  2580. __m128i __B) {
  2581. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  2582. (__v2di) __B,
  2583. (__v2di) __W, __M);
  2584. }
  2585. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2586. _mm_max_epi64 (__m128i __A, __m128i __B) {
  2587. return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
  2588. (__v2di) __B,
  2589. (__v2di)
  2590. _mm_setzero_si128 (),
  2591. (__mmask8) -1);
  2592. }
  2593. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2594. _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
  2595. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  2596. (__v4di) __B,
  2597. (__v4di)
  2598. _mm256_setzero_si256 (),
  2599. __M);
  2600. }
  2601. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2602. _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
  2603. __m256i __B) {
  2604. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  2605. (__v4di) __B,
  2606. (__v4di) __W, __M);
  2607. }
  2608. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2609. _mm256_max_epi64 (__m256i __A, __m256i __B) {
  2610. return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
  2611. (__v4di) __B,
  2612. (__v4di)
  2613. _mm256_setzero_si256 (),
  2614. (__mmask8) -1);
  2615. }
  2616. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2617. _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
  2618. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2619. (__v4si)_mm_max_epu32(__A, __B),
  2620. (__v4si)_mm_setzero_si128());
  2621. }
  2622. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2623. _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
  2624. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2625. (__v4si)_mm_max_epu32(__A, __B),
  2626. (__v4si)__W);
  2627. }
  2628. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2629. _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
  2630. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2631. (__v8si)_mm256_max_epu32(__A, __B),
  2632. (__v8si)_mm256_setzero_si256());
  2633. }
  2634. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2635. _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
  2636. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2637. (__v8si)_mm256_max_epu32(__A, __B),
  2638. (__v8si)__W);
  2639. }
  2640. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2641. _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
  2642. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  2643. (__v2di) __B,
  2644. (__v2di)
  2645. _mm_setzero_si128 (),
  2646. __M);
  2647. }
  2648. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2649. _mm_max_epu64 (__m128i __A, __m128i __B) {
  2650. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  2651. (__v2di) __B,
  2652. (__v2di)
  2653. _mm_setzero_si128 (),
  2654. (__mmask8) -1);
  2655. }
  2656. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2657. _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
  2658. __m128i __B) {
  2659. return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
  2660. (__v2di) __B,
  2661. (__v2di) __W, __M);
  2662. }
  2663. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2664. _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
  2665. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  2666. (__v4di) __B,
  2667. (__v4di)
  2668. _mm256_setzero_si256 (),
  2669. __M);
  2670. }
  2671. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2672. _mm256_max_epu64 (__m256i __A, __m256i __B) {
  2673. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  2674. (__v4di) __B,
  2675. (__v4di)
  2676. _mm256_setzero_si256 (),
  2677. (__mmask8) -1);
  2678. }
  2679. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2680. _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
  2681. __m256i __B) {
  2682. return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
  2683. (__v4di) __B,
  2684. (__v4di) __W, __M);
  2685. }
  2686. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2687. _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
  2688. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2689. (__v4si)_mm_min_epi32(__A, __B),
  2690. (__v4si)_mm_setzero_si128());
  2691. }
  2692. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2693. _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
  2694. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2695. (__v4si)_mm_min_epi32(__A, __B),
  2696. (__v4si)__W);
  2697. }
  2698. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2699. _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
  2700. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2701. (__v8si)_mm256_min_epi32(__A, __B),
  2702. (__v8si)_mm256_setzero_si256());
  2703. }
  2704. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2705. _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
  2706. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2707. (__v8si)_mm256_min_epi32(__A, __B),
  2708. (__v8si)__W);
  2709. }
  2710. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2711. _mm_min_epi64 (__m128i __A, __m128i __B) {
  2712. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  2713. (__v2di) __B,
  2714. (__v2di)
  2715. _mm_setzero_si128 (),
  2716. (__mmask8) -1);
  2717. }
  2718. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2719. _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
  2720. __m128i __B) {
  2721. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  2722. (__v2di) __B,
  2723. (__v2di) __W, __M);
  2724. }
  2725. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2726. _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
  2727. return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
  2728. (__v2di) __B,
  2729. (__v2di)
  2730. _mm_setzero_si128 (),
  2731. __M);
  2732. }
  2733. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2734. _mm256_min_epi64 (__m256i __A, __m256i __B) {
  2735. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  2736. (__v4di) __B,
  2737. (__v4di)
  2738. _mm256_setzero_si256 (),
  2739. (__mmask8) -1);
  2740. }
  2741. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2742. _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
  2743. __m256i __B) {
  2744. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  2745. (__v4di) __B,
  2746. (__v4di) __W, __M);
  2747. }
  2748. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2749. _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
  2750. return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
  2751. (__v4di) __B,
  2752. (__v4di)
  2753. _mm256_setzero_si256 (),
  2754. __M);
  2755. }
  2756. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2757. _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
  2758. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2759. (__v4si)_mm_min_epu32(__A, __B),
  2760. (__v4si)_mm_setzero_si128());
  2761. }
  2762. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2763. _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
  2764. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
  2765. (__v4si)_mm_min_epu32(__A, __B),
  2766. (__v4si)__W);
  2767. }
  2768. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2769. _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
  2770. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2771. (__v8si)_mm256_min_epu32(__A, __B),
  2772. (__v8si)_mm256_setzero_si256());
  2773. }
  2774. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2775. _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
  2776. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  2777. (__v8si)_mm256_min_epu32(__A, __B),
  2778. (__v8si)__W);
  2779. }
  2780. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2781. _mm_min_epu64 (__m128i __A, __m128i __B) {
  2782. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  2783. (__v2di) __B,
  2784. (__v2di)
  2785. _mm_setzero_si128 (),
  2786. (__mmask8) -1);
  2787. }
  2788. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2789. _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
  2790. __m128i __B) {
  2791. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  2792. (__v2di) __B,
  2793. (__v2di) __W, __M);
  2794. }
  2795. static __inline__ __m128i __DEFAULT_FN_ATTRS
  2796. _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
  2797. return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
  2798. (__v2di) __B,
  2799. (__v2di)
  2800. _mm_setzero_si128 (),
  2801. __M);
  2802. }
  2803. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2804. _mm256_min_epu64 (__m256i __A, __m256i __B) {
  2805. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  2806. (__v4di) __B,
  2807. (__v4di)
  2808. _mm256_setzero_si256 (),
  2809. (__mmask8) -1);
  2810. }
  2811. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2812. _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
  2813. __m256i __B) {
  2814. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  2815. (__v4di) __B,
  2816. (__v4di) __W, __M);
  2817. }
  2818. static __inline__ __m256i __DEFAULT_FN_ATTRS
  2819. _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
  2820. return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
  2821. (__v4di) __B,
  2822. (__v4di)
  2823. _mm256_setzero_si256 (),
  2824. __M);
  2825. }
  2826. #define _mm_roundscale_pd(A, imm) __extension__ ({ \
  2827. (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
  2828. (int)(imm), \
  2829. (__v2df)_mm_setzero_pd(), \
  2830. (__mmask8)-1); })
  2831. #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
  2832. (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
  2833. (int)(imm), \
  2834. (__v2df)(__m128d)(W), \
  2835. (__mmask8)(U)); })
  2836. #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
  2837. (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
  2838. (int)(imm), \
  2839. (__v2df)_mm_setzero_pd(), \
  2840. (__mmask8)(U)); })
  2841. #define _mm256_roundscale_pd(A, imm) __extension__ ({ \
  2842. (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
  2843. (int)(imm), \
  2844. (__v4df)_mm256_setzero_pd(), \
  2845. (__mmask8)-1); })
  2846. #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
  2847. (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
  2848. (int)(imm), \
  2849. (__v4df)(__m256d)(W), \
  2850. (__mmask8)(U)); })
  2851. #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
  2852. (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
  2853. (int)(imm), \
  2854. (__v4df)_mm256_setzero_pd(), \
  2855. (__mmask8)(U)); })
  2856. #define _mm_roundscale_ps(A, imm) __extension__ ({ \
  2857. (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
  2858. (__v4sf)_mm_setzero_ps(), \
  2859. (__mmask8)-1); })
  2860. #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
  2861. (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
  2862. (__v4sf)(__m128)(W), \
  2863. (__mmask8)(U)); })
  2864. #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
  2865. (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
  2866. (__v4sf)_mm_setzero_ps(), \
  2867. (__mmask8)(U)); })
  2868. #define _mm256_roundscale_ps(A, imm) __extension__ ({ \
  2869. (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
  2870. (__v8sf)_mm256_setzero_ps(), \
  2871. (__mmask8)-1); })
  2872. #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
  2873. (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
  2874. (__v8sf)(__m256)(W), \
  2875. (__mmask8)(U)); })
  2876. #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
  2877. (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
  2878. (__v8sf)_mm256_setzero_ps(), \
  2879. (__mmask8)(U)); })
  2880. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2881. _mm_scalef_pd (__m128d __A, __m128d __B) {
  2882. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  2883. (__v2df) __B,
  2884. (__v2df)
  2885. _mm_setzero_pd (),
  2886. (__mmask8) -1);
  2887. }
  2888. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2889. _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
  2890. __m128d __B) {
  2891. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  2892. (__v2df) __B,
  2893. (__v2df) __W,
  2894. (__mmask8) __U);
  2895. }
  2896. static __inline__ __m128d __DEFAULT_FN_ATTRS
  2897. _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
  2898. return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
  2899. (__v2df) __B,
  2900. (__v2df)
  2901. _mm_setzero_pd (),
  2902. (__mmask8) __U);
  2903. }
  2904. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2905. _mm256_scalef_pd (__m256d __A, __m256d __B) {
  2906. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  2907. (__v4df) __B,
  2908. (__v4df)
  2909. _mm256_setzero_pd (),
  2910. (__mmask8) -1);
  2911. }
  2912. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2913. _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
  2914. __m256d __B) {
  2915. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  2916. (__v4df) __B,
  2917. (__v4df) __W,
  2918. (__mmask8) __U);
  2919. }
  2920. static __inline__ __m256d __DEFAULT_FN_ATTRS
  2921. _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
  2922. return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
  2923. (__v4df) __B,
  2924. (__v4df)
  2925. _mm256_setzero_pd (),
  2926. (__mmask8) __U);
  2927. }
  2928. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2929. _mm_scalef_ps (__m128 __A, __m128 __B) {
  2930. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  2931. (__v4sf) __B,
  2932. (__v4sf)
  2933. _mm_setzero_ps (),
  2934. (__mmask8) -1);
  2935. }
  2936. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2937. _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  2938. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  2939. (__v4sf) __B,
  2940. (__v4sf) __W,
  2941. (__mmask8) __U);
  2942. }
  2943. static __inline__ __m128 __DEFAULT_FN_ATTRS
  2944. _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
  2945. return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
  2946. (__v4sf) __B,
  2947. (__v4sf)
  2948. _mm_setzero_ps (),
  2949. (__mmask8) __U);
  2950. }
  2951. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2952. _mm256_scalef_ps (__m256 __A, __m256 __B) {
  2953. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  2954. (__v8sf) __B,
  2955. (__v8sf)
  2956. _mm256_setzero_ps (),
  2957. (__mmask8) -1);
  2958. }
  2959. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2960. _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
  2961. __m256 __B) {
  2962. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  2963. (__v8sf) __B,
  2964. (__v8sf) __W,
  2965. (__mmask8) __U);
  2966. }
  2967. static __inline__ __m256 __DEFAULT_FN_ATTRS
  2968. _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
  2969. return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
  2970. (__v8sf) __B,
  2971. (__v8sf)
  2972. _mm256_setzero_ps (),
  2973. (__mmask8) __U);
  2974. }
  2975. #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
  2976. __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
  2977. (__v2di)(__m128i)(index), \
  2978. (__v2df)(__m128d)(v1), (int)(scale)); })
  2979. #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
  2980. __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
  2981. (__v2di)(__m128i)(index), \
  2982. (__v2df)(__m128d)(v1), (int)(scale)); })
  2983. #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
  2984. __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
  2985. (__v2di)(__m128i)(index), \
  2986. (__v2di)(__m128i)(v1), (int)(scale)); })
  2987. #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
  2988. __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
  2989. (__v2di)(__m128i)(index), \
  2990. (__v2di)(__m128i)(v1), (int)(scale)); })
  2991. #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
  2992. __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
  2993. (__v4di)(__m256i)(index), \
  2994. (__v4df)(__m256d)(v1), (int)(scale)); })
  2995. #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
  2996. __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
  2997. (__v4di)(__m256i)(index), \
  2998. (__v4df)(__m256d)(v1), (int)(scale)); })
  2999. #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
  3000. __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
  3001. (__v4di)(__m256i)(index), \
  3002. (__v4di)(__m256i)(v1), (int)(scale)); })
  3003. #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
  3004. __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
  3005. (__v4di)(__m256i)(index), \
  3006. (__v4di)(__m256i)(v1), (int)(scale)); })
  3007. #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
  3008. __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
  3009. (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
  3010. (int)(scale)); })
  3011. #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
  3012. __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
  3013. (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
  3014. (int)(scale)); })
  3015. #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
  3016. __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
  3017. (__v2di)(__m128i)(index), \
  3018. (__v4si)(__m128i)(v1), (int)(scale)); })
  3019. #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
  3020. __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
  3021. (__v2di)(__m128i)(index), \
  3022. (__v4si)(__m128i)(v1), (int)(scale)); })
  3023. #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
  3024. __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
  3025. (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
  3026. (int)(scale)); })
  3027. #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
  3028. __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
  3029. (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
  3030. (int)(scale)); })
  3031. #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
  3032. __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
  3033. (__v4di)(__m256i)(index), \
  3034. (__v4si)(__m128i)(v1), (int)(scale)); })
  3035. #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
  3036. __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
  3037. (__v4di)(__m256i)(index), \
  3038. (__v4si)(__m128i)(v1), (int)(scale)); })
  3039. #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
  3040. __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
  3041. (__v4si)(__m128i)(index), \
  3042. (__v2df)(__m128d)(v1), (int)(scale)); })
  3043. #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
  3044. __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
  3045. (__v4si)(__m128i)(index), \
  3046. (__v2df)(__m128d)(v1), (int)(scale)); })
  3047. #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
  3048. __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
  3049. (__v4si)(__m128i)(index), \
  3050. (__v2di)(__m128i)(v1), (int)(scale)); })
  3051. #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
  3052. __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
  3053. (__v4si)(__m128i)(index), \
  3054. (__v2di)(__m128i)(v1), (int)(scale)); })
  3055. #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
  3056. __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
  3057. (__v4si)(__m128i)(index), \
  3058. (__v4df)(__m256d)(v1), (int)(scale)); })
  3059. #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
  3060. __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
  3061. (__v4si)(__m128i)(index), \
  3062. (__v4df)(__m256d)(v1), (int)(scale)); })
  3063. #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
  3064. __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
  3065. (__v4si)(__m128i)(index), \
  3066. (__v4di)(__m256i)(v1), (int)(scale)); })
  3067. #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
  3068. __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
  3069. (__v4si)(__m128i)(index), \
  3070. (__v4di)(__m256i)(v1), (int)(scale)); })
  3071. #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
  3072. __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
  3073. (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
  3074. (int)(scale)); })
  3075. #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
  3076. __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
  3077. (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
  3078. (int)(scale)); })
  3079. #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
  3080. __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
  3081. (__v4si)(__m128i)(index), \
  3082. (__v4si)(__m128i)(v1), (int)(scale)); })
  3083. #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
  3084. __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
  3085. (__v4si)(__m128i)(index), \
  3086. (__v4si)(__m128i)(v1), (int)(scale)); })
  3087. #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
  3088. __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
  3089. (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
  3090. (int)(scale)); })
  3091. #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
  3092. __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
  3093. (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
  3094. (int)(scale)); })
  3095. #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
  3096. __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
  3097. (__v8si)(__m256i)(index), \
  3098. (__v8si)(__m256i)(v1), (int)(scale)); })
  3099. #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
  3100. __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
  3101. (__v8si)(__m256i)(index), \
  3102. (__v8si)(__m256i)(v1), (int)(scale)); })
  3103. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3104. _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
  3105. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  3106. (__v2df)_mm_sqrt_pd(__A),
  3107. (__v2df)__W);
  3108. }
  3109. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3110. _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
  3111. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  3112. (__v2df)_mm_sqrt_pd(__A),
  3113. (__v2df)_mm_setzero_pd());
  3114. }
  3115. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3116. _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
  3117. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  3118. (__v4df)_mm256_sqrt_pd(__A),
  3119. (__v4df)__W);
  3120. }
  3121. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3122. _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
  3123. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  3124. (__v4df)_mm256_sqrt_pd(__A),
  3125. (__v4df)_mm256_setzero_pd());
  3126. }
  3127. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3128. _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
  3129. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  3130. (__v4sf)_mm_sqrt_ps(__A),
  3131. (__v4sf)__W);
  3132. }
  3133. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3134. _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
  3135. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  3136. (__v4sf)_mm_sqrt_ps(__A),
  3137. (__v4sf)_mm_setzero_pd());
  3138. }
  3139. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3140. _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
  3141. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  3142. (__v8sf)_mm256_sqrt_ps(__A),
  3143. (__v8sf)__W);
  3144. }
  3145. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3146. _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
  3147. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  3148. (__v8sf)_mm256_sqrt_ps(__A),
  3149. (__v8sf)_mm256_setzero_ps());
  3150. }
  3151. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3152. _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  3153. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  3154. (__v2df)_mm_sub_pd(__A, __B),
  3155. (__v2df)__W);
  3156. }
  3157. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3158. _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
  3159. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  3160. (__v2df)_mm_sub_pd(__A, __B),
  3161. (__v2df)_mm_setzero_pd());
  3162. }
  3163. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3164. _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  3165. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  3166. (__v4df)_mm256_sub_pd(__A, __B),
  3167. (__v4df)__W);
  3168. }
  3169. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3170. _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
  3171. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  3172. (__v4df)_mm256_sub_pd(__A, __B),
  3173. (__v4df)_mm256_setzero_pd());
  3174. }
  3175. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3176. _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  3177. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  3178. (__v4sf)_mm_sub_ps(__A, __B),
  3179. (__v4sf)__W);
  3180. }
  3181. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3182. _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
  3183. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  3184. (__v4sf)_mm_sub_ps(__A, __B),
  3185. (__v4sf)_mm_setzero_ps());
  3186. }
  3187. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3188. _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  3189. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  3190. (__v8sf)_mm256_sub_ps(__A, __B),
  3191. (__v8sf)__W);
  3192. }
  3193. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3194. _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
  3195. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  3196. (__v8sf)_mm256_sub_ps(__A, __B),
  3197. (__v8sf)_mm256_setzero_ps());
  3198. }
  3199. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3200. _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
  3201. __m128i __B) {
  3202. return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
  3203. (__v4si) __I
  3204. /* idx */ ,
  3205. (__v4si) __B,
  3206. (__mmask8) __U);
  3207. }
  3208. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3209. _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
  3210. __mmask8 __U, __m256i __B) {
  3211. return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
  3212. (__v8si) __I
  3213. /* idx */ ,
  3214. (__v8si) __B,
  3215. (__mmask8) __U);
  3216. }
  3217. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3218. _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
  3219. __m128d __B) {
  3220. return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
  3221. (__v2di) __I
  3222. /* idx */ ,
  3223. (__v2df) __B,
  3224. (__mmask8)
  3225. __U);
  3226. }
  3227. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3228. _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
  3229. __m256d __B) {
  3230. return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
  3231. (__v4di) __I
  3232. /* idx */ ,
  3233. (__v4df) __B,
  3234. (__mmask8)
  3235. __U);
  3236. }
  3237. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3238. _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
  3239. __m128 __B) {
  3240. return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
  3241. (__v4si) __I
  3242. /* idx */ ,
  3243. (__v4sf) __B,
  3244. (__mmask8) __U);
  3245. }
  3246. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3247. _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
  3248. __m256 __B) {
  3249. return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
  3250. (__v8si) __I
  3251. /* idx */ ,
  3252. (__v8sf) __B,
  3253. (__mmask8) __U);
  3254. }
  3255. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3256. _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
  3257. __m128i __B) {
  3258. return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
  3259. (__v2di) __I
  3260. /* idx */ ,
  3261. (__v2di) __B,
  3262. (__mmask8) __U);
  3263. }
  3264. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3265. _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
  3266. __mmask8 __U, __m256i __B) {
  3267. return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
  3268. (__v4di) __I
  3269. /* idx */ ,
  3270. (__v4di) __B,
  3271. (__mmask8) __U);
  3272. }
  3273. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3274. _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
  3275. return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
  3276. /* idx */ ,
  3277. (__v4si) __A,
  3278. (__v4si) __B,
  3279. (__mmask8) -1);
  3280. }
  3281. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3282. _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
  3283. __m128i __B) {
  3284. return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
  3285. /* idx */ ,
  3286. (__v4si) __A,
  3287. (__v4si) __B,
  3288. (__mmask8) __U);
  3289. }
  3290. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3291. _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
  3292. __m128i __B) {
  3293. return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
  3294. /* idx */ ,
  3295. (__v4si) __A,
  3296. (__v4si) __B,
  3297. (__mmask8)
  3298. __U);
  3299. }
  3300. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3301. _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
  3302. return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
  3303. /* idx */ ,
  3304. (__v8si) __A,
  3305. (__v8si) __B,
  3306. (__mmask8) -1);
  3307. }
  3308. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3309. _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
  3310. __m256i __B) {
  3311. return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
  3312. /* idx */ ,
  3313. (__v8si) __A,
  3314. (__v8si) __B,
  3315. (__mmask8) __U);
  3316. }
  3317. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3318. _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
  3319. __m256i __I, __m256i __B) {
  3320. return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
  3321. /* idx */ ,
  3322. (__v8si) __A,
  3323. (__v8si) __B,
  3324. (__mmask8)
  3325. __U);
  3326. }
  3327. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3328. _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
  3329. return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
  3330. /* idx */ ,
  3331. (__v2df) __A,
  3332. (__v2df) __B,
  3333. (__mmask8) -
  3334. 1);
  3335. }
  3336. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3337. _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
  3338. __m128d __B) {
  3339. return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
  3340. /* idx */ ,
  3341. (__v2df) __A,
  3342. (__v2df) __B,
  3343. (__mmask8)
  3344. __U);
  3345. }
  3346. static __inline__ __m128d __DEFAULT_FN_ATTRS
  3347. _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
  3348. __m128d __B) {
  3349. return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
  3350. /* idx */ ,
  3351. (__v2df) __A,
  3352. (__v2df) __B,
  3353. (__mmask8)
  3354. __U);
  3355. }
  3356. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3357. _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
  3358. return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
  3359. /* idx */ ,
  3360. (__v4df) __A,
  3361. (__v4df) __B,
  3362. (__mmask8) -
  3363. 1);
  3364. }
  3365. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3366. _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
  3367. __m256d __B) {
  3368. return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
  3369. /* idx */ ,
  3370. (__v4df) __A,
  3371. (__v4df) __B,
  3372. (__mmask8)
  3373. __U);
  3374. }
  3375. static __inline__ __m256d __DEFAULT_FN_ATTRS
  3376. _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
  3377. __m256d __B) {
  3378. return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
  3379. /* idx */ ,
  3380. (__v4df) __A,
  3381. (__v4df) __B,
  3382. (__mmask8)
  3383. __U);
  3384. }
  3385. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3386. _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
  3387. return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
  3388. /* idx */ ,
  3389. (__v4sf) __A,
  3390. (__v4sf) __B,
  3391. (__mmask8) -1);
  3392. }
  3393. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3394. _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
  3395. __m128 __B) {
  3396. return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
  3397. /* idx */ ,
  3398. (__v4sf) __A,
  3399. (__v4sf) __B,
  3400. (__mmask8) __U);
  3401. }
  3402. static __inline__ __m128 __DEFAULT_FN_ATTRS
  3403. _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
  3404. __m128 __B) {
  3405. return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
  3406. /* idx */ ,
  3407. (__v4sf) __A,
  3408. (__v4sf) __B,
  3409. (__mmask8)
  3410. __U);
  3411. }
  3412. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3413. _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
  3414. return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
  3415. /* idx */ ,
  3416. (__v8sf) __A,
  3417. (__v8sf) __B,
  3418. (__mmask8) -1);
  3419. }
  3420. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3421. _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
  3422. __m256 __B) {
  3423. return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
  3424. /* idx */ ,
  3425. (__v8sf) __A,
  3426. (__v8sf) __B,
  3427. (__mmask8) __U);
  3428. }
  3429. static __inline__ __m256 __DEFAULT_FN_ATTRS
  3430. _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
  3431. __m256 __B) {
  3432. return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
  3433. /* idx */ ,
  3434. (__v8sf) __A,
  3435. (__v8sf) __B,
  3436. (__mmask8)
  3437. __U);
  3438. }
  3439. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3440. _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
  3441. return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
  3442. /* idx */ ,
  3443. (__v2di) __A,
  3444. (__v2di) __B,
  3445. (__mmask8) -1);
  3446. }
  3447. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3448. _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
  3449. __m128i __B) {
  3450. return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
  3451. /* idx */ ,
  3452. (__v2di) __A,
  3453. (__v2di) __B,
  3454. (__mmask8) __U);
  3455. }
  3456. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3457. _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
  3458. __m128i __B) {
  3459. return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
  3460. /* idx */ ,
  3461. (__v2di) __A,
  3462. (__v2di) __B,
  3463. (__mmask8)
  3464. __U);
  3465. }
  3466. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3467. _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
  3468. return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
  3469. /* idx */ ,
  3470. (__v4di) __A,
  3471. (__v4di) __B,
  3472. (__mmask8) -1);
  3473. }
  3474. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3475. _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
  3476. __m256i __B) {
  3477. return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
  3478. /* idx */ ,
  3479. (__v4di) __A,
  3480. (__v4di) __B,
  3481. (__mmask8) __U);
  3482. }
  3483. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3484. _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
  3485. __m256i __I, __m256i __B) {
  3486. return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
  3487. /* idx */ ,
  3488. (__v4di) __A,
  3489. (__v4di) __B,
  3490. (__mmask8)
  3491. __U);
  3492. }
  3493. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3494. _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
  3495. {
  3496. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3497. (__v4si)_mm_cvtepi8_epi32(__A),
  3498. (__v4si)__W);
  3499. }
  3500. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3501. _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
  3502. {
  3503. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3504. (__v4si)_mm_cvtepi8_epi32(__A),
  3505. (__v4si)_mm_setzero_si128());
  3506. }
  3507. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3508. _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
  3509. {
  3510. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3511. (__v8si)_mm256_cvtepi8_epi32(__A),
  3512. (__v8si)__W);
  3513. }
  3514. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3515. _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
  3516. {
  3517. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3518. (__v8si)_mm256_cvtepi8_epi32(__A),
  3519. (__v8si)_mm256_setzero_si256());
  3520. }
  3521. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3522. _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
  3523. {
  3524. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3525. (__v2di)_mm_cvtepi8_epi64(__A),
  3526. (__v2di)__W);
  3527. }
  3528. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3529. _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
  3530. {
  3531. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3532. (__v2di)_mm_cvtepi8_epi64(__A),
  3533. (__v2di)_mm_setzero_si128());
  3534. }
  3535. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3536. _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
  3537. {
  3538. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3539. (__v4di)_mm256_cvtepi8_epi64(__A),
  3540. (__v4di)__W);
  3541. }
  3542. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3543. _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
  3544. {
  3545. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3546. (__v4di)_mm256_cvtepi8_epi64(__A),
  3547. (__v4di)_mm256_setzero_si256());
  3548. }
  3549. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3550. _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
  3551. {
  3552. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3553. (__v2di)_mm_cvtepi32_epi64(__X),
  3554. (__v2di)__W);
  3555. }
  3556. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3557. _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
  3558. {
  3559. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3560. (__v2di)_mm_cvtepi32_epi64(__X),
  3561. (__v2di)_mm_setzero_si128());
  3562. }
  3563. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3564. _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
  3565. {
  3566. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3567. (__v4di)_mm256_cvtepi32_epi64(__X),
  3568. (__v4di)__W);
  3569. }
  3570. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3571. _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
  3572. {
  3573. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3574. (__v4di)_mm256_cvtepi32_epi64(__X),
  3575. (__v4di)_mm256_setzero_si256());
  3576. }
  3577. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3578. _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
  3579. {
  3580. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3581. (__v4si)_mm_cvtepi16_epi32(__A),
  3582. (__v4si)__W);
  3583. }
  3584. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3585. _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
  3586. {
  3587. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3588. (__v4si)_mm_cvtepi16_epi32(__A),
  3589. (__v4si)_mm_setzero_si128());
  3590. }
  3591. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3592. _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
  3593. {
  3594. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3595. (__v8si)_mm256_cvtepi16_epi32(__A),
  3596. (__v8si)__W);
  3597. }
  3598. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3599. _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
  3600. {
  3601. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3602. (__v8si)_mm256_cvtepi16_epi32(__A),
  3603. (__v8si)_mm256_setzero_si256());
  3604. }
  3605. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3606. _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
  3607. {
  3608. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3609. (__v2di)_mm_cvtepi16_epi64(__A),
  3610. (__v2di)__W);
  3611. }
  3612. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3613. _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
  3614. {
  3615. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3616. (__v2di)_mm_cvtepi16_epi64(__A),
  3617. (__v2di)_mm_setzero_si128());
  3618. }
  3619. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3620. _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
  3621. {
  3622. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3623. (__v4di)_mm256_cvtepi16_epi64(__A),
  3624. (__v4di)__W);
  3625. }
  3626. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3627. _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
  3628. {
  3629. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3630. (__v4di)_mm256_cvtepi16_epi64(__A),
  3631. (__v4di)_mm256_setzero_si256());
  3632. }
  3633. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3634. _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
  3635. {
  3636. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3637. (__v4si)_mm_cvtepu8_epi32(__A),
  3638. (__v4si)__W);
  3639. }
  3640. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3641. _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
  3642. {
  3643. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3644. (__v4si)_mm_cvtepu8_epi32(__A),
  3645. (__v4si)_mm_setzero_si128());
  3646. }
  3647. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3648. _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
  3649. {
  3650. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3651. (__v8si)_mm256_cvtepu8_epi32(__A),
  3652. (__v8si)__W);
  3653. }
  3654. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3655. _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
  3656. {
  3657. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3658. (__v8si)_mm256_cvtepu8_epi32(__A),
  3659. (__v8si)_mm256_setzero_si256());
  3660. }
  3661. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3662. _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
  3663. {
  3664. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3665. (__v2di)_mm_cvtepu8_epi64(__A),
  3666. (__v2di)__W);
  3667. }
  3668. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3669. _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
  3670. {
  3671. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3672. (__v2di)_mm_cvtepu8_epi64(__A),
  3673. (__v2di)_mm_setzero_si128());
  3674. }
  3675. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3676. _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
  3677. {
  3678. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3679. (__v4di)_mm256_cvtepu8_epi64(__A),
  3680. (__v4di)__W);
  3681. }
  3682. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3683. _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
  3684. {
  3685. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3686. (__v4di)_mm256_cvtepu8_epi64(__A),
  3687. (__v4di)_mm256_setzero_si256());
  3688. }
  3689. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3690. _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
  3691. {
  3692. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3693. (__v2di)_mm_cvtepu32_epi64(__X),
  3694. (__v2di)__W);
  3695. }
  3696. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3697. _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
  3698. {
  3699. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3700. (__v2di)_mm_cvtepu32_epi64(__X),
  3701. (__v2di)_mm_setzero_si128());
  3702. }
  3703. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3704. _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
  3705. {
  3706. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3707. (__v4di)_mm256_cvtepu32_epi64(__X),
  3708. (__v4di)__W);
  3709. }
  3710. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3711. _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
  3712. {
  3713. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3714. (__v4di)_mm256_cvtepu32_epi64(__X),
  3715. (__v4di)_mm256_setzero_si256());
  3716. }
  3717. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3718. _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
  3719. {
  3720. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3721. (__v4si)_mm_cvtepu16_epi32(__A),
  3722. (__v4si)__W);
  3723. }
  3724. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3725. _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
  3726. {
  3727. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3728. (__v4si)_mm_cvtepu16_epi32(__A),
  3729. (__v4si)_mm_setzero_si128());
  3730. }
  3731. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3732. _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
  3733. {
  3734. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3735. (__v8si)_mm256_cvtepu16_epi32(__A),
  3736. (__v8si)__W);
  3737. }
  3738. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3739. _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
  3740. {
  3741. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3742. (__v8si)_mm256_cvtepu16_epi32(__A),
  3743. (__v8si)_mm256_setzero_si256());
  3744. }
  3745. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3746. _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
  3747. {
  3748. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3749. (__v2di)_mm_cvtepu16_epi64(__A),
  3750. (__v2di)__W);
  3751. }
  3752. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3753. _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
  3754. {
  3755. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  3756. (__v2di)_mm_cvtepu16_epi64(__A),
  3757. (__v2di)_mm_setzero_si128());
  3758. }
  3759. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3760. _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
  3761. {
  3762. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3763. (__v4di)_mm256_cvtepu16_epi64(__A),
  3764. (__v4di)__W);
  3765. }
  3766. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3767. _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
  3768. {
  3769. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  3770. (__v4di)_mm256_cvtepu16_epi64(__A),
  3771. (__v4di)_mm256_setzero_si256());
  3772. }
  3773. #define _mm_rol_epi32(a, b) __extension__ ({\
  3774. (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
  3775. (__v4si)_mm_setzero_si128(), \
  3776. (__mmask8)-1); })
  3777. #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
  3778. (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
  3779. (__v4si)(__m128i)(w), (__mmask8)(u)); })
  3780. #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
  3781. (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
  3782. (__v4si)_mm_setzero_si128(), \
  3783. (__mmask8)(u)); })
  3784. #define _mm256_rol_epi32(a, b) __extension__ ({\
  3785. (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
  3786. (__v8si)_mm256_setzero_si256(), \
  3787. (__mmask8)-1); })
  3788. #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
  3789. (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
  3790. (__v8si)(__m256i)(w), (__mmask8)(u)); })
  3791. #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
  3792. (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
  3793. (__v8si)_mm256_setzero_si256(), \
  3794. (__mmask8)(u)); })
  3795. #define _mm_rol_epi64(a, b) __extension__ ({\
  3796. (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
  3797. (__v2di)_mm_setzero_di(), \
  3798. (__mmask8)-1); })
  3799. #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
  3800. (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
  3801. (__v2di)(__m128i)(w), (__mmask8)(u)); })
  3802. #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
  3803. (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
  3804. (__v2di)_mm_setzero_di(), \
  3805. (__mmask8)(u)); })
  3806. #define _mm256_rol_epi64(a, b) __extension__ ({\
  3807. (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
  3808. (__v4di)_mm256_setzero_si256(), \
  3809. (__mmask8)-1); })
  3810. #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
  3811. (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
  3812. (__v4di)(__m256i)(w), (__mmask8)(u)); })
  3813. #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
  3814. (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
  3815. (__v4di)_mm256_setzero_si256(), \
  3816. (__mmask8)(u)); })
  3817. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3818. _mm_rolv_epi32 (__m128i __A, __m128i __B)
  3819. {
  3820. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  3821. (__v4si) __B,
  3822. (__v4si)
  3823. _mm_setzero_si128 (),
  3824. (__mmask8) -1);
  3825. }
  3826. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3827. _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  3828. __m128i __B)
  3829. {
  3830. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  3831. (__v4si) __B,
  3832. (__v4si) __W,
  3833. (__mmask8) __U);
  3834. }
  3835. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3836. _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  3837. {
  3838. return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
  3839. (__v4si) __B,
  3840. (__v4si)
  3841. _mm_setzero_si128 (),
  3842. (__mmask8) __U);
  3843. }
  3844. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3845. _mm256_rolv_epi32 (__m256i __A, __m256i __B)
  3846. {
  3847. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  3848. (__v8si) __B,
  3849. (__v8si)
  3850. _mm256_setzero_si256 (),
  3851. (__mmask8) -1);
  3852. }
  3853. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3854. _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  3855. __m256i __B)
  3856. {
  3857. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  3858. (__v8si) __B,
  3859. (__v8si) __W,
  3860. (__mmask8) __U);
  3861. }
  3862. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3863. _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  3864. {
  3865. return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
  3866. (__v8si) __B,
  3867. (__v8si)
  3868. _mm256_setzero_si256 (),
  3869. (__mmask8) __U);
  3870. }
  3871. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3872. _mm_rolv_epi64 (__m128i __A, __m128i __B)
  3873. {
  3874. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  3875. (__v2di) __B,
  3876. (__v2di)
  3877. _mm_setzero_di (),
  3878. (__mmask8) -1);
  3879. }
  3880. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3881. _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  3882. __m128i __B)
  3883. {
  3884. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  3885. (__v2di) __B,
  3886. (__v2di) __W,
  3887. (__mmask8) __U);
  3888. }
  3889. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3890. _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  3891. {
  3892. return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
  3893. (__v2di) __B,
  3894. (__v2di)
  3895. _mm_setzero_di (),
  3896. (__mmask8) __U);
  3897. }
  3898. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3899. _mm256_rolv_epi64 (__m256i __A, __m256i __B)
  3900. {
  3901. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  3902. (__v4di) __B,
  3903. (__v4di)
  3904. _mm256_setzero_si256 (),
  3905. (__mmask8) -1);
  3906. }
  3907. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3908. _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  3909. __m256i __B)
  3910. {
  3911. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  3912. (__v4di) __B,
  3913. (__v4di) __W,
  3914. (__mmask8) __U);
  3915. }
  3916. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3917. _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  3918. {
  3919. return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
  3920. (__v4di) __B,
  3921. (__v4di)
  3922. _mm256_setzero_si256 (),
  3923. (__mmask8) __U);
  3924. }
  3925. #define _mm_ror_epi32(A, B) __extension__ ({ \
  3926. (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
  3927. (__v4si)_mm_setzero_si128(), \
  3928. (__mmask8)-1); })
  3929. #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
  3930. (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
  3931. (__v4si)(__m128i)(W), (__mmask8)(U)); })
  3932. #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
  3933. (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
  3934. (__v4si)_mm_setzero_si128(), \
  3935. (__mmask8)(U)); })
  3936. #define _mm256_ror_epi32(A, B) __extension__ ({ \
  3937. (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
  3938. (__v8si)_mm256_setzero_si256(), \
  3939. (__mmask8)-1); })
  3940. #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
  3941. (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
  3942. (__v8si)(__m256i)(W), (__mmask8)(U)); })
  3943. #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
  3944. (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
  3945. (__v8si)_mm256_setzero_si256(), \
  3946. (__mmask8)(U)); })
  3947. #define _mm_ror_epi64(A, B) __extension__ ({ \
  3948. (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
  3949. (__v2di)_mm_setzero_di(), \
  3950. (__mmask8)-1); })
  3951. #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
  3952. (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
  3953. (__v2di)(__m128i)(W), (__mmask8)(U)); })
  3954. #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
  3955. (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
  3956. (__v2di)_mm_setzero_di(), \
  3957. (__mmask8)(U)); })
  3958. #define _mm256_ror_epi64(A, B) __extension__ ({ \
  3959. (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
  3960. (__v4di)_mm256_setzero_si256(), \
  3961. (__mmask8)-1); })
  3962. #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
  3963. (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
  3964. (__v4di)(__m256i)(W), (__mmask8)(U)); })
  3965. #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
  3966. (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
  3967. (__v4di)_mm256_setzero_si256(), \
  3968. (__mmask8)(U)); })
  3969. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3970. _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  3971. {
  3972. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3973. (__v4si)_mm_sll_epi32(__A, __B),
  3974. (__v4si)__W);
  3975. }
  3976. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3977. _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  3978. {
  3979. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  3980. (__v4si)_mm_sll_epi32(__A, __B),
  3981. (__v4si)_mm_setzero_si128());
  3982. }
  3983. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3984. _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
  3985. {
  3986. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3987. (__v8si)_mm256_sll_epi32(__A, __B),
  3988. (__v8si)__W);
  3989. }
  3990. static __inline__ __m256i __DEFAULT_FN_ATTRS
  3991. _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
  3992. {
  3993. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  3994. (__v8si)_mm256_sll_epi32(__A, __B),
  3995. (__v8si)_mm256_setzero_si256());
  3996. }
  3997. static __inline__ __m128i __DEFAULT_FN_ATTRS
  3998. _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
  3999. {
  4000. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4001. (__v4si)_mm_slli_epi32(__A, __B),
  4002. (__v4si)__W);
  4003. }
  4004. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4005. _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
  4006. {
  4007. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4008. (__v4si)_mm_slli_epi32(__A, __B),
  4009. (__v4si)_mm_setzero_si128());
  4010. }
  4011. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4012. _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
  4013. {
  4014. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4015. (__v8si)_mm256_slli_epi32(__A, __B),
  4016. (__v8si)__W);
  4017. }
  4018. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4019. _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
  4020. {
  4021. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4022. (__v8si)_mm256_slli_epi32(__A, __B),
  4023. (__v8si)_mm256_setzero_si256());
  4024. }
  4025. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4026. _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  4027. {
  4028. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4029. (__v2di)_mm_sll_epi64(__A, __B),
  4030. (__v2di)__W);
  4031. }
  4032. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4033. _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  4034. {
  4035. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4036. (__v2di)_mm_sll_epi64(__A, __B),
  4037. (__v2di)_mm_setzero_di());
  4038. }
  4039. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4040. _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
  4041. {
  4042. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4043. (__v4di)_mm256_sll_epi64(__A, __B),
  4044. (__v4di)__W);
  4045. }
  4046. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4047. _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
  4048. {
  4049. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4050. (__v4di)_mm256_sll_epi64(__A, __B),
  4051. (__v4di)_mm256_setzero_si256());
  4052. }
  4053. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4054. _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
  4055. {
  4056. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4057. (__v2di)_mm_slli_epi64(__A, __B),
  4058. (__v2di)__W);
  4059. }
  4060. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4061. _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
  4062. {
  4063. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4064. (__v2di)_mm_slli_epi64(__A, __B),
  4065. (__v2di)_mm_setzero_di());
  4066. }
  4067. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4068. _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
  4069. {
  4070. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4071. (__v4di)_mm256_slli_epi64(__A, __B),
  4072. (__v4di)__W);
  4073. }
  4074. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4075. _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
  4076. {
  4077. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4078. (__v4di)_mm256_slli_epi64(__A, __B),
  4079. (__v4di)_mm256_setzero_si256());
  4080. }
  4081. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4082. _mm_rorv_epi32 (__m128i __A, __m128i __B)
  4083. {
  4084. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  4085. (__v4si) __B,
  4086. (__v4si)
  4087. _mm_setzero_si128 (),
  4088. (__mmask8) -1);
  4089. }
  4090. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4091. _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
  4092. __m128i __B)
  4093. {
  4094. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  4095. (__v4si) __B,
  4096. (__v4si) __W,
  4097. (__mmask8) __U);
  4098. }
  4099. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4100. _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
  4101. {
  4102. return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
  4103. (__v4si) __B,
  4104. (__v4si)
  4105. _mm_setzero_si128 (),
  4106. (__mmask8) __U);
  4107. }
  4108. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4109. _mm256_rorv_epi32 (__m256i __A, __m256i __B)
  4110. {
  4111. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  4112. (__v8si) __B,
  4113. (__v8si)
  4114. _mm256_setzero_si256 (),
  4115. (__mmask8) -1);
  4116. }
  4117. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4118. _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
  4119. __m256i __B)
  4120. {
  4121. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  4122. (__v8si) __B,
  4123. (__v8si) __W,
  4124. (__mmask8) __U);
  4125. }
  4126. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4127. _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
  4128. {
  4129. return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
  4130. (__v8si) __B,
  4131. (__v8si)
  4132. _mm256_setzero_si256 (),
  4133. (__mmask8) __U);
  4134. }
  4135. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4136. _mm_rorv_epi64 (__m128i __A, __m128i __B)
  4137. {
  4138. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  4139. (__v2di) __B,
  4140. (__v2di)
  4141. _mm_setzero_di (),
  4142. (__mmask8) -1);
  4143. }
  4144. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4145. _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
  4146. __m128i __B)
  4147. {
  4148. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  4149. (__v2di) __B,
  4150. (__v2di) __W,
  4151. (__mmask8) __U);
  4152. }
  4153. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4154. _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
  4155. {
  4156. return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
  4157. (__v2di) __B,
  4158. (__v2di)
  4159. _mm_setzero_di (),
  4160. (__mmask8) __U);
  4161. }
  4162. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4163. _mm256_rorv_epi64 (__m256i __A, __m256i __B)
  4164. {
  4165. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  4166. (__v4di) __B,
  4167. (__v4di)
  4168. _mm256_setzero_si256 (),
  4169. (__mmask8) -1);
  4170. }
  4171. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4172. _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
  4173. __m256i __B)
  4174. {
  4175. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  4176. (__v4di) __B,
  4177. (__v4di) __W,
  4178. (__mmask8) __U);
  4179. }
  4180. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4181. _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
  4182. {
  4183. return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
  4184. (__v4di) __B,
  4185. (__v4di)
  4186. _mm256_setzero_si256 (),
  4187. (__mmask8) __U);
  4188. }
  4189. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4190. _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
  4191. {
  4192. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4193. (__v2di)_mm_sllv_epi64(__X, __Y),
  4194. (__v2di)__W);
  4195. }
  4196. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4197. _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
  4198. {
  4199. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4200. (__v2di)_mm_sllv_epi64(__X, __Y),
  4201. (__v2di)_mm_setzero_di());
  4202. }
  4203. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4204. _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
  4205. {
  4206. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4207. (__v4di)_mm256_sllv_epi64(__X, __Y),
  4208. (__v4di)__W);
  4209. }
  4210. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4211. _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
  4212. {
  4213. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4214. (__v4di)_mm256_sllv_epi64(__X, __Y),
  4215. (__v4di)_mm256_setzero_si256());
  4216. }
  4217. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4218. _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
  4219. {
  4220. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4221. (__v4si)_mm_sllv_epi32(__X, __Y),
  4222. (__v4si)__W);
  4223. }
  4224. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4225. _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
  4226. {
  4227. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4228. (__v4si)_mm_sllv_epi32(__X, __Y),
  4229. (__v4si)_mm_setzero_si128());
  4230. }
  4231. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4232. _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
  4233. {
  4234. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4235. (__v8si)_mm256_sllv_epi32(__X, __Y),
  4236. (__v8si)__W);
  4237. }
  4238. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4239. _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
  4240. {
  4241. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4242. (__v8si)_mm256_sllv_epi32(__X, __Y),
  4243. (__v8si)_mm256_setzero_si256());
  4244. }
  4245. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4246. _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
  4247. {
  4248. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4249. (__v2di)_mm_srlv_epi64(__X, __Y),
  4250. (__v2di)__W);
  4251. }
  4252. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4253. _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
  4254. {
  4255. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4256. (__v2di)_mm_srlv_epi64(__X, __Y),
  4257. (__v2di)_mm_setzero_di());
  4258. }
  4259. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4260. _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
  4261. {
  4262. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4263. (__v4di)_mm256_srlv_epi64(__X, __Y),
  4264. (__v4di)__W);
  4265. }
  4266. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4267. _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
  4268. {
  4269. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4270. (__v4di)_mm256_srlv_epi64(__X, __Y),
  4271. (__v4di)_mm256_setzero_si256());
  4272. }
  4273. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4274. _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
  4275. {
  4276. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4277. (__v4si)_mm_srlv_epi32(__X, __Y),
  4278. (__v4si)__W);
  4279. }
  4280. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4281. _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
  4282. {
  4283. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4284. (__v4si)_mm_srlv_epi32(__X, __Y),
  4285. (__v4si)_mm_setzero_si128());
  4286. }
  4287. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4288. _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
  4289. {
  4290. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4291. (__v8si)_mm256_srlv_epi32(__X, __Y),
  4292. (__v8si)__W);
  4293. }
  4294. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4295. _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
  4296. {
  4297. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4298. (__v8si)_mm256_srlv_epi32(__X, __Y),
  4299. (__v8si)_mm256_setzero_si256());
  4300. }
  4301. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4302. _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  4303. {
  4304. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4305. (__v4si)_mm_srl_epi32(__A, __B),
  4306. (__v4si)__W);
  4307. }
  4308. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4309. _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  4310. {
  4311. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4312. (__v4si)_mm_srl_epi32(__A, __B),
  4313. (__v4si)_mm_setzero_si128());
  4314. }
  4315. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4316. _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
  4317. {
  4318. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4319. (__v8si)_mm256_srl_epi32(__A, __B),
  4320. (__v8si)__W);
  4321. }
  4322. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4323. _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
  4324. {
  4325. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4326. (__v8si)_mm256_srl_epi32(__A, __B),
  4327. (__v8si)_mm256_setzero_si256());
  4328. }
  4329. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4330. _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
  4331. {
  4332. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4333. (__v4si)_mm_srli_epi32(__A, __B),
  4334. (__v4si)__W);
  4335. }
  4336. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4337. _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
  4338. {
  4339. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4340. (__v4si)_mm_srli_epi32(__A, __B),
  4341. (__v4si)_mm_setzero_si128());
  4342. }
  4343. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4344. _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
  4345. {
  4346. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4347. (__v8si)_mm256_srli_epi32(__A, __B),
  4348. (__v8si)__W);
  4349. }
  4350. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4351. _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
  4352. {
  4353. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4354. (__v8si)_mm256_srli_epi32(__A, __B),
  4355. (__v8si)_mm256_setzero_si256());
  4356. }
  4357. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4358. _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  4359. {
  4360. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4361. (__v2di)_mm_srl_epi64(__A, __B),
  4362. (__v2di)__W);
  4363. }
  4364. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4365. _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  4366. {
  4367. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4368. (__v2di)_mm_srl_epi64(__A, __B),
  4369. (__v2di)_mm_setzero_di());
  4370. }
  4371. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4372. _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
  4373. {
  4374. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4375. (__v4di)_mm256_srl_epi64(__A, __B),
  4376. (__v4di)__W);
  4377. }
  4378. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4379. _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
  4380. {
  4381. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4382. (__v4di)_mm256_srl_epi64(__A, __B),
  4383. (__v4di)_mm256_setzero_si256());
  4384. }
  4385. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4386. _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
  4387. {
  4388. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4389. (__v2di)_mm_srli_epi64(__A, __B),
  4390. (__v2di)__W);
  4391. }
  4392. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4393. _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
  4394. {
  4395. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4396. (__v2di)_mm_srli_epi64(__A, __B),
  4397. (__v2di)_mm_setzero_di());
  4398. }
  4399. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4400. _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
  4401. {
  4402. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4403. (__v4di)_mm256_srli_epi64(__A, __B),
  4404. (__v4di)__W);
  4405. }
  4406. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4407. _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
  4408. {
  4409. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4410. (__v4di)_mm256_srli_epi64(__A, __B),
  4411. (__v4di)_mm256_setzero_si256());
  4412. }
  4413. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4414. _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
  4415. {
  4416. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4417. (__v4si)_mm_srav_epi32(__X, __Y),
  4418. (__v4si)__W);
  4419. }
  4420. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4421. _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
  4422. {
  4423. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  4424. (__v4si)_mm_srav_epi32(__X, __Y),
  4425. (__v4si)_mm_setzero_si128());
  4426. }
  4427. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4428. _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
  4429. {
  4430. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4431. (__v8si)_mm256_srav_epi32(__X, __Y),
  4432. (__v8si)__W);
  4433. }
  4434. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4435. _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
  4436. {
  4437. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  4438. (__v8si)_mm256_srav_epi32(__X, __Y),
  4439. (__v8si)_mm256_setzero_si256());
  4440. }
  4441. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4442. _mm_srav_epi64(__m128i __X, __m128i __Y)
  4443. {
  4444. return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
  4445. }
  4446. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4447. _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
  4448. {
  4449. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4450. (__v2di)_mm_srav_epi64(__X, __Y),
  4451. (__v2di)__W);
  4452. }
  4453. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4454. _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
  4455. {
  4456. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  4457. (__v2di)_mm_srav_epi64(__X, __Y),
  4458. (__v2di)_mm_setzero_di());
  4459. }
  4460. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4461. _mm256_srav_epi64(__m256i __X, __m256i __Y)
  4462. {
  4463. return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
  4464. }
  4465. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4466. _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
  4467. {
  4468. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4469. (__v4di)_mm256_srav_epi64(__X, __Y),
  4470. (__v4di)__W);
  4471. }
  4472. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4473. _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
  4474. {
  4475. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  4476. (__v4di)_mm256_srav_epi64(__X, __Y),
  4477. (__v4di)_mm256_setzero_si256());
  4478. }
  4479. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4480. _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  4481. {
  4482. return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
  4483. (__v4si) __A,
  4484. (__v4si) __W);
  4485. }
  4486. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4487. _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
  4488. {
  4489. return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
  4490. (__v4si) __A,
  4491. (__v4si) _mm_setzero_si128 ());
  4492. }
  4493. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4494. _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  4495. {
  4496. return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
  4497. (__v8si) __A,
  4498. (__v8si) __W);
  4499. }
  4500. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4501. _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
  4502. {
  4503. return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
  4504. (__v8si) __A,
  4505. (__v8si) _mm256_setzero_si256 ());
  4506. }
  4507. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4508. _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  4509. {
  4510. return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
  4511. (__v4si) __W,
  4512. (__mmask8)
  4513. __U);
  4514. }
  4515. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4516. _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
  4517. {
  4518. return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
  4519. (__v4si)
  4520. _mm_setzero_si128 (),
  4521. (__mmask8)
  4522. __U);
  4523. }
  4524. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4525. _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
  4526. {
  4527. return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
  4528. (__v8si) __W,
  4529. (__mmask8)
  4530. __U);
  4531. }
  4532. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4533. _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
  4534. {
  4535. return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
  4536. (__v8si)
  4537. _mm256_setzero_si256 (),
  4538. (__mmask8)
  4539. __U);
  4540. }
  4541. static __inline__ void __DEFAULT_FN_ATTRS
  4542. _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
  4543. {
  4544. __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
  4545. (__v4si) __A,
  4546. (__mmask8) __U);
  4547. }
  4548. static __inline__ void __DEFAULT_FN_ATTRS
  4549. _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
  4550. {
  4551. __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
  4552. (__v8si) __A,
  4553. (__mmask8) __U);
  4554. }
  4555. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4556. _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  4557. {
  4558. return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
  4559. (__v2di) __A,
  4560. (__v2di) __W);
  4561. }
  4562. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4563. _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
  4564. {
  4565. return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
  4566. (__v2di) __A,
  4567. (__v2di) _mm_setzero_di ());
  4568. }
  4569. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4570. _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  4571. {
  4572. return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
  4573. (__v4di) __A,
  4574. (__v4di) __W);
  4575. }
  4576. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4577. _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
  4578. {
  4579. return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
  4580. (__v4di) __A,
  4581. (__v4di) _mm256_setzero_si256 ());
  4582. }
  4583. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4584. _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  4585. {
  4586. return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
  4587. (__v2di) __W,
  4588. (__mmask8)
  4589. __U);
  4590. }
  4591. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4592. _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
  4593. {
  4594. return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
  4595. (__v2di)
  4596. _mm_setzero_di (),
  4597. (__mmask8)
  4598. __U);
  4599. }
  4600. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4601. _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
  4602. {
  4603. return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
  4604. (__v4di) __W,
  4605. (__mmask8)
  4606. __U);
  4607. }
  4608. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4609. _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
  4610. {
  4611. return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
  4612. (__v4di)
  4613. _mm256_setzero_si256 (),
  4614. (__mmask8)
  4615. __U);
  4616. }
  4617. static __inline__ void __DEFAULT_FN_ATTRS
  4618. _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
  4619. {
  4620. __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
  4621. (__v2di) __A,
  4622. (__mmask8) __U);
  4623. }
  4624. static __inline__ void __DEFAULT_FN_ATTRS
  4625. _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
  4626. {
  4627. __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
  4628. (__v4di) __A,
  4629. (__mmask8) __U);
  4630. }
  4631. static __inline__ __m128d __DEFAULT_FN_ATTRS
  4632. _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
  4633. {
  4634. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  4635. (__v2df)_mm_movedup_pd(__A),
  4636. (__v2df)__W);
  4637. }
  4638. static __inline__ __m128d __DEFAULT_FN_ATTRS
  4639. _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
  4640. {
  4641. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  4642. (__v2df)_mm_movedup_pd(__A),
  4643. (__v2df)_mm_setzero_pd());
  4644. }
  4645. static __inline__ __m256d __DEFAULT_FN_ATTRS
  4646. _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
  4647. {
  4648. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  4649. (__v4df)_mm256_movedup_pd(__A),
  4650. (__v4df)__W);
  4651. }
  4652. static __inline__ __m256d __DEFAULT_FN_ATTRS
  4653. _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
  4654. {
  4655. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  4656. (__v4df)_mm256_movedup_pd(__A),
  4657. (__v4df)_mm256_setzero_pd());
  4658. }
  4659. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4660. _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
  4661. {
  4662. return (__m128i)__builtin_ia32_selectd_128(__M,
  4663. (__v4si) _mm_set1_epi32(__A),
  4664. (__v4si)__O);
  4665. }
  4666. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4667. _mm_maskz_set1_epi32( __mmask8 __M, int __A)
  4668. {
  4669. return (__m128i)__builtin_ia32_selectd_128(__M,
  4670. (__v4si) _mm_set1_epi32(__A),
  4671. (__v4si)_mm_setzero_si128());
  4672. }
  4673. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4674. _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
  4675. {
  4676. return (__m256i)__builtin_ia32_selectd_256(__M,
  4677. (__v8si) _mm256_set1_epi32(__A),
  4678. (__v8si)__O);
  4679. }
  4680. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4681. _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
  4682. {
  4683. return (__m256i)__builtin_ia32_selectd_256(__M,
  4684. (__v8si) _mm256_set1_epi32(__A),
  4685. (__v8si)_mm256_setzero_si256());
  4686. }
  4687. #ifdef __x86_64__
  4688. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4689. _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
  4690. {
  4691. return (__m128i) __builtin_ia32_selectq_128(__M,
  4692. (__v2di) _mm_set1_epi64x(__A),
  4693. (__v2di) __O);
  4694. }
  4695. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4696. _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
  4697. {
  4698. return (__m128i) __builtin_ia32_selectq_128(__M,
  4699. (__v2di) _mm_set1_epi64x(__A),
  4700. (__v2di) _mm_setzero_si128());
  4701. }
  4702. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4703. _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
  4704. {
  4705. return (__m256i) __builtin_ia32_selectq_256(__M,
  4706. (__v4di) _mm256_set1_epi64x(__A),
  4707. (__v4di) __O) ;
  4708. }
  4709. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4710. _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
  4711. {
  4712. return (__m256i) __builtin_ia32_selectq_256(__M,
  4713. (__v4di) _mm256_set1_epi64x(__A),
  4714. (__v4di) _mm256_setzero_si256());
  4715. }
  4716. #endif
  4717. #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
  4718. (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
  4719. (__v2df)(__m128d)(B), \
  4720. (__v2di)(__m128i)(C), (int)(imm), \
  4721. (__mmask8)-1); })
  4722. #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
  4723. (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
  4724. (__v2df)(__m128d)(B), \
  4725. (__v2di)(__m128i)(C), (int)(imm), \
  4726. (__mmask8)(U)); })
  4727. #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
  4728. (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
  4729. (__v2df)(__m128d)(B), \
  4730. (__v2di)(__m128i)(C), \
  4731. (int)(imm), (__mmask8)(U)); })
  4732. #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
  4733. (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
  4734. (__v4df)(__m256d)(B), \
  4735. (__v4di)(__m256i)(C), (int)(imm), \
  4736. (__mmask8)-1); })
  4737. #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
  4738. (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
  4739. (__v4df)(__m256d)(B), \
  4740. (__v4di)(__m256i)(C), (int)(imm), \
  4741. (__mmask8)(U)); })
  4742. #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
  4743. (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
  4744. (__v4df)(__m256d)(B), \
  4745. (__v4di)(__m256i)(C), \
  4746. (int)(imm), (__mmask8)(U)); })
  4747. #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
  4748. (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
  4749. (__v4sf)(__m128)(B), \
  4750. (__v4si)(__m128i)(C), (int)(imm), \
  4751. (__mmask8)-1); })
  4752. #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
  4753. (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
  4754. (__v4sf)(__m128)(B), \
  4755. (__v4si)(__m128i)(C), (int)(imm), \
  4756. (__mmask8)(U)); })
  4757. #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
  4758. (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
  4759. (__v4sf)(__m128)(B), \
  4760. (__v4si)(__m128i)(C), (int)(imm), \
  4761. (__mmask8)(U)); })
  4762. #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
  4763. (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
  4764. (__v8sf)(__m256)(B), \
  4765. (__v8si)(__m256i)(C), (int)(imm), \
  4766. (__mmask8)-1); })
  4767. #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
  4768. (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
  4769. (__v8sf)(__m256)(B), \
  4770. (__v8si)(__m256i)(C), (int)(imm), \
  4771. (__mmask8)(U)); })
  4772. #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
  4773. (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
  4774. (__v8sf)(__m256)(B), \
  4775. (__v8si)(__m256i)(C), (int)(imm), \
  4776. (__mmask8)(U)); })
  4777. static __inline__ __m128d __DEFAULT_FN_ATTRS
  4778. _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
  4779. {
  4780. return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
  4781. (__v2df) __W,
  4782. (__mmask8) __U);
  4783. }
  4784. static __inline__ __m128d __DEFAULT_FN_ATTRS
  4785. _mm_maskz_load_pd (__mmask8 __U, void const *__P)
  4786. {
  4787. return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
  4788. (__v2df)
  4789. _mm_setzero_pd (),
  4790. (__mmask8) __U);
  4791. }
  4792. static __inline__ __m256d __DEFAULT_FN_ATTRS
  4793. _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
  4794. {
  4795. return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
  4796. (__v4df) __W,
  4797. (__mmask8) __U);
  4798. }
  4799. static __inline__ __m256d __DEFAULT_FN_ATTRS
  4800. _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
  4801. {
  4802. return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
  4803. (__v4df)
  4804. _mm256_setzero_pd (),
  4805. (__mmask8) __U);
  4806. }
  4807. static __inline__ __m128 __DEFAULT_FN_ATTRS
  4808. _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
  4809. {
  4810. return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
  4811. (__v4sf) __W,
  4812. (__mmask8) __U);
  4813. }
  4814. static __inline__ __m128 __DEFAULT_FN_ATTRS
  4815. _mm_maskz_load_ps (__mmask8 __U, void const *__P)
  4816. {
  4817. return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
  4818. (__v4sf)
  4819. _mm_setzero_ps (),
  4820. (__mmask8) __U);
  4821. }
  4822. static __inline__ __m256 __DEFAULT_FN_ATTRS
  4823. _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
  4824. {
  4825. return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
  4826. (__v8sf) __W,
  4827. (__mmask8) __U);
  4828. }
  4829. static __inline__ __m256 __DEFAULT_FN_ATTRS
  4830. _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
  4831. {
  4832. return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
  4833. (__v8sf)
  4834. _mm256_setzero_ps (),
  4835. (__mmask8) __U);
  4836. }
  4837. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4838. _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
  4839. {
  4840. return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
  4841. (__v2di) __W,
  4842. (__mmask8) __U);
  4843. }
  4844. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4845. _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  4846. {
  4847. return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
  4848. (__v2di)
  4849. _mm_setzero_si128 (),
  4850. (__mmask8) __U);
  4851. }
  4852. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4853. _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
  4854. {
  4855. return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
  4856. (__v4di) __W,
  4857. (__mmask8) __U);
  4858. }
  4859. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4860. _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
  4861. {
  4862. return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
  4863. (__v4di)
  4864. _mm256_setzero_si256 (),
  4865. (__mmask8) __U);
  4866. }
  4867. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4868. _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
  4869. {
  4870. return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
  4871. (__v4si) __W,
  4872. (__mmask8) __U);
  4873. }
  4874. static __inline__ __m128i __DEFAULT_FN_ATTRS
  4875. _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
  4876. {
  4877. return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
  4878. (__v4si)
  4879. _mm_setzero_si128 (),
  4880. (__mmask8) __U);
  4881. }
  4882. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4883. _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
  4884. {
  4885. return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
  4886. (__v8si) __W,
  4887. (__mmask8) __U);
  4888. }
  4889. static __inline__ __m256i __DEFAULT_FN_ATTRS
  4890. _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
  4891. {
  4892. return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
  4893. (__v8si)
  4894. _mm256_setzero_si256 (),
  4895. (__mmask8) __U);
  4896. }
  4897. static __inline__ __m128d __DEFAULT_FN_ATTRS
  4898. _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
  4899. {
  4900. return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
  4901. (__v2df) __W,
  4902. (__mmask8) __U);
  4903. }
  4904. static __inline__ __m128d __DEFAULT_FN_ATTRS
  4905. _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
  4906. {
  4907. return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
  4908. (__v2df)
  4909. _mm_setzero_pd (),
  4910. (__mmask8) __U);
  4911. }
  4912. static __inline__ __m256d __DEFAULT_FN_ATTRS
  4913. _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
  4914. {
  4915. return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
  4916. (__v4df) __W,
  4917. (__mmask8) __U);
  4918. }
  4919. static __inline__ __m256d __DEFAULT_FN_ATTRS
  4920. _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
  4921. {
  4922. return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
  4923. (__v4df)
  4924. _mm256_setzero_pd (),
  4925. (__mmask8) __U);
  4926. }
  4927. static __inline__ __m128 __DEFAULT_FN_ATTRS
  4928. _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
  4929. {
  4930. return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
  4931. (__v4sf) __W,
  4932. (__mmask8) __U);
  4933. }
  4934. static __inline__ __m128 __DEFAULT_FN_ATTRS
  4935. _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
  4936. {
  4937. return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
  4938. (__v4sf)
  4939. _mm_setzero_ps (),
  4940. (__mmask8) __U);
  4941. }
  4942. static __inline__ __m256 __DEFAULT_FN_ATTRS
  4943. _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
  4944. {
  4945. return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
  4946. (__v8sf) __W,
  4947. (__mmask8) __U);
  4948. }
  4949. static __inline__ __m256 __DEFAULT_FN_ATTRS
  4950. _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
  4951. {
  4952. return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
  4953. (__v8sf)
  4954. _mm256_setzero_ps (),
  4955. (__mmask8) __U);
  4956. }
  4957. static __inline__ void __DEFAULT_FN_ATTRS
  4958. _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
  4959. {
  4960. __builtin_ia32_storeapd128_mask ((__v2df *) __P,
  4961. (__v2df) __A,
  4962. (__mmask8) __U);
  4963. }
  4964. static __inline__ void __DEFAULT_FN_ATTRS
  4965. _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
  4966. {
  4967. __builtin_ia32_storeapd256_mask ((__v4df *) __P,
  4968. (__v4df) __A,
  4969. (__mmask8) __U);
  4970. }
  4971. static __inline__ void __DEFAULT_FN_ATTRS
  4972. _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
  4973. {
  4974. __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
  4975. (__v4sf) __A,
  4976. (__mmask8) __U);
  4977. }
  4978. static __inline__ void __DEFAULT_FN_ATTRS
  4979. _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
  4980. {
  4981. __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
  4982. (__v8sf) __A,
  4983. (__mmask8) __U);
  4984. }
  4985. static __inline__ void __DEFAULT_FN_ATTRS
  4986. _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
  4987. {
  4988. __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
  4989. (__v2di) __A,
  4990. (__mmask8) __U);
  4991. }
  4992. static __inline__ void __DEFAULT_FN_ATTRS
  4993. _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
  4994. {
  4995. __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
  4996. (__v4di) __A,
  4997. (__mmask8) __U);
  4998. }
  4999. static __inline__ void __DEFAULT_FN_ATTRS
  5000. _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
  5001. {
  5002. __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
  5003. (__v4si) __A,
  5004. (__mmask8) __U);
  5005. }
  5006. static __inline__ void __DEFAULT_FN_ATTRS
  5007. _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
  5008. {
  5009. __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
  5010. (__v8si) __A,
  5011. (__mmask8) __U);
  5012. }
  5013. static __inline__ void __DEFAULT_FN_ATTRS
  5014. _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
  5015. {
  5016. __builtin_ia32_storeupd128_mask ((__v2df *) __P,
  5017. (__v2df) __A,
  5018. (__mmask8) __U);
  5019. }
  5020. static __inline__ void __DEFAULT_FN_ATTRS
  5021. _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
  5022. {
  5023. __builtin_ia32_storeupd256_mask ((__v4df *) __P,
  5024. (__v4df) __A,
  5025. (__mmask8) __U);
  5026. }
  5027. static __inline__ void __DEFAULT_FN_ATTRS
  5028. _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
  5029. {
  5030. __builtin_ia32_storeups128_mask ((__v4sf *) __P,
  5031. (__v4sf) __A,
  5032. (__mmask8) __U);
  5033. }
  5034. static __inline__ void __DEFAULT_FN_ATTRS
  5035. _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
  5036. {
  5037. __builtin_ia32_storeups256_mask ((__v8sf *) __P,
  5038. (__v8sf) __A,
  5039. (__mmask8) __U);
  5040. }
  5041. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5042. _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  5043. {
  5044. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  5045. (__v2df)_mm_unpackhi_pd(__A, __B),
  5046. (__v2df)__W);
  5047. }
  5048. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5049. _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
  5050. {
  5051. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  5052. (__v2df)_mm_unpackhi_pd(__A, __B),
  5053. (__v2df)_mm_setzero_pd());
  5054. }
  5055. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5056. _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
  5057. {
  5058. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  5059. (__v4df)_mm256_unpackhi_pd(__A, __B),
  5060. (__v4df)__W);
  5061. }
  5062. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5063. _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
  5064. {
  5065. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  5066. (__v4df)_mm256_unpackhi_pd(__A, __B),
  5067. (__v4df)_mm256_setzero_pd());
  5068. }
  5069. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5070. _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  5071. {
  5072. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  5073. (__v4sf)_mm_unpackhi_ps(__A, __B),
  5074. (__v4sf)__W);
  5075. }
  5076. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5077. _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
  5078. {
  5079. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  5080. (__v4sf)_mm_unpackhi_ps(__A, __B),
  5081. (__v4sf)_mm_setzero_ps());
  5082. }
  5083. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5084. _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  5085. {
  5086. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  5087. (__v8sf)_mm256_unpackhi_ps(__A, __B),
  5088. (__v8sf)__W);
  5089. }
  5090. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5091. _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
  5092. {
  5093. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  5094. (__v8sf)_mm256_unpackhi_ps(__A, __B),
  5095. (__v8sf)_mm256_setzero_ps());
  5096. }
  5097. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5098. _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
  5099. {
  5100. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  5101. (__v2df)_mm_unpacklo_pd(__A, __B),
  5102. (__v2df)__W);
  5103. }
  5104. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5105. _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
  5106. {
  5107. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  5108. (__v2df)_mm_unpacklo_pd(__A, __B),
  5109. (__v2df)_mm_setzero_pd());
  5110. }
  5111. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5112. _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
  5113. {
  5114. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  5115. (__v4df)_mm256_unpacklo_pd(__A, __B),
  5116. (__v4df)__W);
  5117. }
  5118. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5119. _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
  5120. {
  5121. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  5122. (__v4df)_mm256_unpacklo_pd(__A, __B),
  5123. (__v4df)_mm256_setzero_pd());
  5124. }
  5125. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5126. _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
  5127. {
  5128. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  5129. (__v4sf)_mm_unpacklo_ps(__A, __B),
  5130. (__v4sf)__W);
  5131. }
  5132. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5133. _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
  5134. {
  5135. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  5136. (__v4sf)_mm_unpacklo_ps(__A, __B),
  5137. (__v4sf)_mm_setzero_ps());
  5138. }
  5139. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5140. _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
  5141. {
  5142. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  5143. (__v8sf)_mm256_unpacklo_ps(__A, __B),
  5144. (__v8sf)__W);
  5145. }
  5146. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5147. _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
  5148. {
  5149. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  5150. (__v8sf)_mm256_unpacklo_ps(__A, __B),
  5151. (__v8sf)_mm256_setzero_ps());
  5152. }
  5153. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5154. _mm_rcp14_pd (__m128d __A)
  5155. {
  5156. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  5157. (__v2df)
  5158. _mm_setzero_pd (),
  5159. (__mmask8) -1);
  5160. }
  5161. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5162. _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
  5163. {
  5164. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  5165. (__v2df) __W,
  5166. (__mmask8) __U);
  5167. }
  5168. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5169. _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
  5170. {
  5171. return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
  5172. (__v2df)
  5173. _mm_setzero_pd (),
  5174. (__mmask8) __U);
  5175. }
  5176. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5177. _mm256_rcp14_pd (__m256d __A)
  5178. {
  5179. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  5180. (__v4df)
  5181. _mm256_setzero_pd (),
  5182. (__mmask8) -1);
  5183. }
  5184. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5185. _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
  5186. {
  5187. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  5188. (__v4df) __W,
  5189. (__mmask8) __U);
  5190. }
  5191. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5192. _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
  5193. {
  5194. return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
  5195. (__v4df)
  5196. _mm256_setzero_pd (),
  5197. (__mmask8) __U);
  5198. }
  5199. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5200. _mm_rcp14_ps (__m128 __A)
  5201. {
  5202. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  5203. (__v4sf)
  5204. _mm_setzero_ps (),
  5205. (__mmask8) -1);
  5206. }
  5207. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5208. _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
  5209. {
  5210. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  5211. (__v4sf) __W,
  5212. (__mmask8) __U);
  5213. }
  5214. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5215. _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
  5216. {
  5217. return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
  5218. (__v4sf)
  5219. _mm_setzero_ps (),
  5220. (__mmask8) __U);
  5221. }
  5222. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5223. _mm256_rcp14_ps (__m256 __A)
  5224. {
  5225. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  5226. (__v8sf)
  5227. _mm256_setzero_ps (),
  5228. (__mmask8) -1);
  5229. }
  5230. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5231. _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
  5232. {
  5233. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  5234. (__v8sf) __W,
  5235. (__mmask8) __U);
  5236. }
  5237. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5238. _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
  5239. {
  5240. return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
  5241. (__v8sf)
  5242. _mm256_setzero_ps (),
  5243. (__mmask8) __U);
  5244. }
  5245. #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
  5246. (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
  5247. (__v2df)_mm_permute_pd((X), (C)), \
  5248. (__v2df)(__m128d)(W)); })
  5249. #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
  5250. (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
  5251. (__v2df)_mm_permute_pd((X), (C)), \
  5252. (__v2df)_mm_setzero_pd()); })
  5253. #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
  5254. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  5255. (__v4df)_mm256_permute_pd((X), (C)), \
  5256. (__v4df)(__m256d)(W)); })
  5257. #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
  5258. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  5259. (__v4df)_mm256_permute_pd((X), (C)), \
  5260. (__v4df)_mm256_setzero_pd()); })
  5261. #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
  5262. (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
  5263. (__v4sf)_mm_permute_ps((X), (C)), \
  5264. (__v4sf)(__m128)(W)); })
  5265. #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
  5266. (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
  5267. (__v4sf)_mm_permute_ps((X), (C)), \
  5268. (__v4sf)_mm_setzero_ps()); })
  5269. #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
  5270. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  5271. (__v8sf)_mm256_permute_ps((X), (C)), \
  5272. (__v8sf)(__m256)(W)); })
  5273. #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
  5274. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  5275. (__v8sf)_mm256_permute_ps((X), (C)), \
  5276. (__v8sf)_mm256_setzero_ps()); })
  5277. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5278. _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
  5279. {
  5280. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  5281. (__v2df)_mm_permutevar_pd(__A, __C),
  5282. (__v2df)__W);
  5283. }
  5284. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5285. _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
  5286. {
  5287. return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
  5288. (__v2df)_mm_permutevar_pd(__A, __C),
  5289. (__v2df)_mm_setzero_pd());
  5290. }
  5291. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5292. _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
  5293. {
  5294. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  5295. (__v4df)_mm256_permutevar_pd(__A, __C),
  5296. (__v4df)__W);
  5297. }
  5298. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5299. _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
  5300. {
  5301. return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
  5302. (__v4df)_mm256_permutevar_pd(__A, __C),
  5303. (__v4df)_mm256_setzero_pd());
  5304. }
  5305. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5306. _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
  5307. {
  5308. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  5309. (__v4sf)_mm_permutevar_ps(__A, __C),
  5310. (__v4sf)__W);
  5311. }
  5312. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5313. _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
  5314. {
  5315. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  5316. (__v4sf)_mm_permutevar_ps(__A, __C),
  5317. (__v4sf)_mm_setzero_ps());
  5318. }
  5319. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5320. _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
  5321. {
  5322. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  5323. (__v8sf)_mm256_permutevar_ps(__A, __C),
  5324. (__v8sf)__W);
  5325. }
  5326. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5327. _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
  5328. {
  5329. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  5330. (__v8sf)_mm256_permutevar_ps(__A, __C),
  5331. (__v8sf)_mm256_setzero_ps());
  5332. }
  5333. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5334. _mm_test_epi32_mask (__m128i __A, __m128i __B)
  5335. {
  5336. return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
  5337. }
  5338. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5339. _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5340. {
  5341. return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
  5342. _mm_setzero_di());
  5343. }
  5344. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5345. _mm256_test_epi32_mask (__m256i __A, __m256i __B)
  5346. {
  5347. return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
  5348. _mm256_setzero_si256());
  5349. }
  5350. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5351. _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5352. {
  5353. return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
  5354. _mm256_setzero_si256());
  5355. }
  5356. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5357. _mm_test_epi64_mask (__m128i __A, __m128i __B)
  5358. {
  5359. return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
  5360. }
  5361. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5362. _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5363. {
  5364. return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
  5365. _mm_setzero_di());
  5366. }
  5367. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5368. _mm256_test_epi64_mask (__m256i __A, __m256i __B)
  5369. {
  5370. return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
  5371. _mm256_setzero_si256());
  5372. }
  5373. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5374. _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5375. {
  5376. return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
  5377. _mm256_setzero_si256());
  5378. }
  5379. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5380. _mm_testn_epi32_mask (__m128i __A, __m128i __B)
  5381. {
  5382. return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
  5383. }
  5384. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5385. _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5386. {
  5387. return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
  5388. _mm_setzero_di());
  5389. }
  5390. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5391. _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
  5392. {
  5393. return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
  5394. _mm256_setzero_si256());
  5395. }
  5396. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5397. _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5398. {
  5399. return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
  5400. _mm256_setzero_si256());
  5401. }
  5402. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5403. _mm_testn_epi64_mask (__m128i __A, __m128i __B)
  5404. {
  5405. return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
  5406. }
  5407. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5408. _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
  5409. {
  5410. return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
  5411. _mm_setzero_di());
  5412. }
  5413. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5414. _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
  5415. {
  5416. return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
  5417. _mm256_setzero_si256());
  5418. }
  5419. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  5420. _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
  5421. {
  5422. return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
  5423. _mm256_setzero_si256());
  5424. }
  5425. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5426. _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  5427. {
  5428. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5429. (__v4si)_mm_unpackhi_epi32(__A, __B),
  5430. (__v4si)__W);
  5431. }
  5432. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5433. _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  5434. {
  5435. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5436. (__v4si)_mm_unpackhi_epi32(__A, __B),
  5437. (__v4si)_mm_setzero_si128());
  5438. }
  5439. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5440. _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  5441. {
  5442. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5443. (__v8si)_mm256_unpackhi_epi32(__A, __B),
  5444. (__v8si)__W);
  5445. }
  5446. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5447. _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  5448. {
  5449. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5450. (__v8si)_mm256_unpackhi_epi32(__A, __B),
  5451. (__v8si)_mm256_setzero_si256());
  5452. }
  5453. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5454. _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  5455. {
  5456. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  5457. (__v2di)_mm_unpackhi_epi64(__A, __B),
  5458. (__v2di)__W);
  5459. }
  5460. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5461. _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  5462. {
  5463. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  5464. (__v2di)_mm_unpackhi_epi64(__A, __B),
  5465. (__v2di)_mm_setzero_di());
  5466. }
  5467. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5468. _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  5469. {
  5470. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  5471. (__v4di)_mm256_unpackhi_epi64(__A, __B),
  5472. (__v4di)__W);
  5473. }
  5474. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5475. _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  5476. {
  5477. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  5478. (__v4di)_mm256_unpackhi_epi64(__A, __B),
  5479. (__v4di)_mm256_setzero_si256());
  5480. }
  5481. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5482. _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  5483. {
  5484. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5485. (__v4si)_mm_unpacklo_epi32(__A, __B),
  5486. (__v4si)__W);
  5487. }
  5488. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5489. _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  5490. {
  5491. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5492. (__v4si)_mm_unpacklo_epi32(__A, __B),
  5493. (__v4si)_mm_setzero_si128());
  5494. }
  5495. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5496. _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  5497. {
  5498. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5499. (__v8si)_mm256_unpacklo_epi32(__A, __B),
  5500. (__v8si)__W);
  5501. }
  5502. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5503. _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
  5504. {
  5505. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5506. (__v8si)_mm256_unpacklo_epi32(__A, __B),
  5507. (__v8si)_mm256_setzero_si256());
  5508. }
  5509. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5510. _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  5511. {
  5512. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  5513. (__v2di)_mm_unpacklo_epi64(__A, __B),
  5514. (__v2di)__W);
  5515. }
  5516. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5517. _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  5518. {
  5519. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
  5520. (__v2di)_mm_unpacklo_epi64(__A, __B),
  5521. (__v2di)_mm_setzero_di());
  5522. }
  5523. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5524. _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
  5525. {
  5526. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  5527. (__v4di)_mm256_unpacklo_epi64(__A, __B),
  5528. (__v4di)__W);
  5529. }
  5530. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5531. _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
  5532. {
  5533. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
  5534. (__v4di)_mm256_unpacklo_epi64(__A, __B),
  5535. (__v4di)_mm256_setzero_si256());
  5536. }
  5537. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5538. _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  5539. {
  5540. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5541. (__v4si)_mm_sra_epi32(__A, __B),
  5542. (__v4si)__W);
  5543. }
  5544. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5545. _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
  5546. {
  5547. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5548. (__v4si)_mm_sra_epi32(__A, __B),
  5549. (__v4si)_mm_setzero_si128());
  5550. }
  5551. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5552. _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
  5553. {
  5554. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5555. (__v8si)_mm256_sra_epi32(__A, __B),
  5556. (__v8si)__W);
  5557. }
  5558. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5559. _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
  5560. {
  5561. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5562. (__v8si)_mm256_sra_epi32(__A, __B),
  5563. (__v8si)_mm256_setzero_si256());
  5564. }
  5565. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5566. _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
  5567. {
  5568. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5569. (__v4si)_mm_srai_epi32(__A, __B),
  5570. (__v4si)__W);
  5571. }
  5572. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5573. _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
  5574. {
  5575. return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
  5576. (__v4si)_mm_srai_epi32(__A, __B),
  5577. (__v4si)_mm_setzero_si128());
  5578. }
  5579. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5580. _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
  5581. {
  5582. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5583. (__v8si)_mm256_srai_epi32(__A, __B),
  5584. (__v8si)__W);
  5585. }
  5586. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5587. _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
  5588. {
  5589. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
  5590. (__v8si)_mm256_srai_epi32(__A, __B),
  5591. (__v8si)_mm256_setzero_si256());
  5592. }
  5593. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5594. _mm_sra_epi64(__m128i __A, __m128i __B)
  5595. {
  5596. return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
  5597. }
  5598. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5599. _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
  5600. {
  5601. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
  5602. (__v2di)_mm_sra_epi64(__A, __B), \
  5603. (__v2di)__W);
  5604. }
  5605. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5606. _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
  5607. {
  5608. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
  5609. (__v2di)_mm_sra_epi64(__A, __B), \
  5610. (__v2di)_mm_setzero_di());
  5611. }
  5612. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5613. _mm256_sra_epi64(__m256i __A, __m128i __B)
  5614. {
  5615. return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
  5616. }
  5617. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5618. _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
  5619. {
  5620. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
  5621. (__v4di)_mm256_sra_epi64(__A, __B), \
  5622. (__v4di)__W);
  5623. }
  5624. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5625. _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
  5626. {
  5627. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
  5628. (__v4di)_mm256_sra_epi64(__A, __B), \
  5629. (__v4di)_mm256_setzero_si256());
  5630. }
  5631. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5632. _mm_srai_epi64(__m128i __A, int __imm)
  5633. {
  5634. return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
  5635. }
  5636. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5637. _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
  5638. {
  5639. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
  5640. (__v2di)_mm_srai_epi64(__A, __imm), \
  5641. (__v2di)__W);
  5642. }
  5643. static __inline__ __m128i __DEFAULT_FN_ATTRS
  5644. _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
  5645. {
  5646. return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
  5647. (__v2di)_mm_srai_epi64(__A, __imm), \
  5648. (__v2di)_mm_setzero_di());
  5649. }
  5650. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5651. _mm256_srai_epi64(__m256i __A, int __imm)
  5652. {
  5653. return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
  5654. }
  5655. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5656. _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
  5657. {
  5658. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
  5659. (__v4di)_mm256_srai_epi64(__A, __imm), \
  5660. (__v4di)__W);
  5661. }
  5662. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5663. _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
  5664. {
  5665. return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
  5666. (__v4di)_mm256_srai_epi64(__A, __imm), \
  5667. (__v4di)_mm256_setzero_si256());
  5668. }
  5669. #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
  5670. (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
  5671. (__v4si)(__m128i)(B), \
  5672. (__v4si)(__m128i)(C), (int)(imm), \
  5673. (__mmask8)-1); })
  5674. #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
  5675. (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
  5676. (__v4si)(__m128i)(B), \
  5677. (__v4si)(__m128i)(C), (int)(imm), \
  5678. (__mmask8)(U)); })
  5679. #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
  5680. (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
  5681. (__v4si)(__m128i)(B), \
  5682. (__v4si)(__m128i)(C), (int)(imm), \
  5683. (__mmask8)(U)); })
  5684. #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
  5685. (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
  5686. (__v8si)(__m256i)(B), \
  5687. (__v8si)(__m256i)(C), (int)(imm), \
  5688. (__mmask8)-1); })
  5689. #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
  5690. (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
  5691. (__v8si)(__m256i)(B), \
  5692. (__v8si)(__m256i)(C), (int)(imm), \
  5693. (__mmask8)(U)); })
  5694. #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
  5695. (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
  5696. (__v8si)(__m256i)(B), \
  5697. (__v8si)(__m256i)(C), (int)(imm), \
  5698. (__mmask8)(U)); })
  5699. #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
  5700. (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
  5701. (__v2di)(__m128i)(B), \
  5702. (__v2di)(__m128i)(C), (int)(imm), \
  5703. (__mmask8)-1); })
  5704. #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
  5705. (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
  5706. (__v2di)(__m128i)(B), \
  5707. (__v2di)(__m128i)(C), (int)(imm), \
  5708. (__mmask8)(U)); })
  5709. #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
  5710. (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
  5711. (__v2di)(__m128i)(B), \
  5712. (__v2di)(__m128i)(C), (int)(imm), \
  5713. (__mmask8)(U)); })
  5714. #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
  5715. (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
  5716. (__v4di)(__m256i)(B), \
  5717. (__v4di)(__m256i)(C), (int)(imm), \
  5718. (__mmask8)-1); })
  5719. #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
  5720. (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
  5721. (__v4di)(__m256i)(B), \
  5722. (__v4di)(__m256i)(C), (int)(imm), \
  5723. (__mmask8)(U)); })
  5724. #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
  5725. (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
  5726. (__v4di)(__m256i)(B), \
  5727. (__v4di)(__m256i)(C), (int)(imm), \
  5728. (__mmask8)(U)); })
  5729. #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
  5730. (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
  5731. (__v8sf)(__m256)(B), \
  5732. 0 + ((((imm) >> 0) & 0x1) * 4), \
  5733. 1 + ((((imm) >> 0) & 0x1) * 4), \
  5734. 2 + ((((imm) >> 0) & 0x1) * 4), \
  5735. 3 + ((((imm) >> 0) & 0x1) * 4), \
  5736. 8 + ((((imm) >> 1) & 0x1) * 4), \
  5737. 9 + ((((imm) >> 1) & 0x1) * 4), \
  5738. 10 + ((((imm) >> 1) & 0x1) * 4), \
  5739. 11 + ((((imm) >> 1) & 0x1) * 4)); })
  5740. #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
  5741. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  5742. (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
  5743. (__v8sf)(__m256)(W)); })
  5744. #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
  5745. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  5746. (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
  5747. (__v8sf)_mm256_setzero_ps()); })
  5748. #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
  5749. (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
  5750. (__v4df)(__m256d)(B), \
  5751. 0 + ((((imm) >> 0) & 0x1) * 2), \
  5752. 1 + ((((imm) >> 0) & 0x1) * 2), \
  5753. 4 + ((((imm) >> 1) & 0x1) * 2), \
  5754. 5 + ((((imm) >> 1) & 0x1) * 2)); })
  5755. #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
  5756. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  5757. (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
  5758. (__v4df)(__m256)(W)); })
  5759. #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
  5760. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  5761. (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
  5762. (__v4df)_mm256_setzero_pd()); })
  5763. #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
  5764. (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
  5765. (__v4di)(__m256i)(B), \
  5766. 0 + ((((imm) >> 0) & 0x1) * 2), \
  5767. 1 + ((((imm) >> 0) & 0x1) * 2), \
  5768. 4 + ((((imm) >> 1) & 0x1) * 2), \
  5769. 5 + ((((imm) >> 1) & 0x1) * 2)); })
  5770. #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
  5771. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  5772. (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
  5773. (__v8si)(__m256)(W)); })
  5774. #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
  5775. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  5776. (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
  5777. (__v8si)_mm256_setzero_si256()); })
  5778. #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
  5779. (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
  5780. (__v4di)(__m256i)(B), \
  5781. 0 + ((((imm) >> 0) & 0x1) * 2), \
  5782. 1 + ((((imm) >> 0) & 0x1) * 2), \
  5783. 4 + ((((imm) >> 1) & 0x1) * 2), \
  5784. 5 + ((((imm) >> 1) & 0x1) * 2)); })
  5785. #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
  5786. (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
  5787. (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
  5788. (__v4di)(__m256)(W)); })
  5789. #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
  5790. (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
  5791. (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
  5792. (__v4di)_mm256_setzero_si256()); })
  5793. #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
  5794. (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
  5795. (__v2df)_mm_shuffle_pd((A), (B), (M)), \
  5796. (__v2df)(__m128d)(W)); })
  5797. #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
  5798. (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
  5799. (__v2df)_mm_shuffle_pd((A), (B), (M)), \
  5800. (__v2df)_mm_setzero_pd()); })
  5801. #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
  5802. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  5803. (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
  5804. (__v4df)(__m256d)(W)); })
  5805. #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
  5806. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  5807. (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
  5808. (__v4df)_mm256_setzero_pd()); })
  5809. #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
  5810. (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
  5811. (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
  5812. (__v4sf)(__m128)(W)); })
  5813. #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
  5814. (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
  5815. (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
  5816. (__v4sf)_mm_setzero_ps()); })
  5817. #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
  5818. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  5819. (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
  5820. (__v8sf)(__m256)(W)); })
  5821. #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
  5822. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  5823. (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
  5824. (__v8sf)_mm256_setzero_ps()); })
  5825. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5826. _mm_rsqrt14_pd (__m128d __A)
  5827. {
  5828. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  5829. (__v2df)
  5830. _mm_setzero_pd (),
  5831. (__mmask8) -1);
  5832. }
  5833. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5834. _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
  5835. {
  5836. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  5837. (__v2df) __W,
  5838. (__mmask8) __U);
  5839. }
  5840. static __inline__ __m128d __DEFAULT_FN_ATTRS
  5841. _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
  5842. {
  5843. return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
  5844. (__v2df)
  5845. _mm_setzero_pd (),
  5846. (__mmask8) __U);
  5847. }
  5848. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5849. _mm256_rsqrt14_pd (__m256d __A)
  5850. {
  5851. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  5852. (__v4df)
  5853. _mm256_setzero_pd (),
  5854. (__mmask8) -1);
  5855. }
  5856. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5857. _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
  5858. {
  5859. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  5860. (__v4df) __W,
  5861. (__mmask8) __U);
  5862. }
  5863. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5864. _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
  5865. {
  5866. return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
  5867. (__v4df)
  5868. _mm256_setzero_pd (),
  5869. (__mmask8) __U);
  5870. }
  5871. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5872. _mm_rsqrt14_ps (__m128 __A)
  5873. {
  5874. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  5875. (__v4sf)
  5876. _mm_setzero_ps (),
  5877. (__mmask8) -1);
  5878. }
  5879. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5880. _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
  5881. {
  5882. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  5883. (__v4sf) __W,
  5884. (__mmask8) __U);
  5885. }
  5886. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5887. _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
  5888. {
  5889. return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
  5890. (__v4sf)
  5891. _mm_setzero_ps (),
  5892. (__mmask8) __U);
  5893. }
  5894. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5895. _mm256_rsqrt14_ps (__m256 __A)
  5896. {
  5897. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  5898. (__v8sf)
  5899. _mm256_setzero_ps (),
  5900. (__mmask8) -1);
  5901. }
  5902. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5903. _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
  5904. {
  5905. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  5906. (__v8sf) __W,
  5907. (__mmask8) __U);
  5908. }
  5909. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5910. _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
  5911. {
  5912. return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
  5913. (__v8sf)
  5914. _mm256_setzero_ps (),
  5915. (__mmask8) __U);
  5916. }
  5917. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5918. _mm256_broadcast_f32x4(__m128 __A)
  5919. {
  5920. return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
  5921. 0, 1, 2, 3, 0, 1, 2, 3);
  5922. }
  5923. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5924. _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
  5925. {
  5926. return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
  5927. (__v8sf)_mm256_broadcast_f32x4(__A),
  5928. (__v8sf)__O);
  5929. }
  5930. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5931. _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
  5932. {
  5933. return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
  5934. (__v8sf)_mm256_broadcast_f32x4(__A),
  5935. (__v8sf)_mm256_setzero_ps());
  5936. }
  5937. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5938. _mm256_broadcast_i32x4(__m128i __A)
  5939. {
  5940. return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
  5941. 0, 1, 2, 3, 0, 1, 2, 3);
  5942. }
  5943. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5944. _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
  5945. {
  5946. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  5947. (__v8si)_mm256_broadcast_i32x4(__A),
  5948. (__v8si)__O);
  5949. }
  5950. static __inline__ __m256i __DEFAULT_FN_ATTRS
  5951. _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
  5952. {
  5953. return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
  5954. (__v8si)_mm256_broadcast_i32x4(__A),
  5955. (__v8si)_mm256_setzero_si256());
  5956. }
  5957. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5958. _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
  5959. {
  5960. return (__m256d)__builtin_ia32_selectpd_256(__M,
  5961. (__v4df) _mm256_broadcastsd_pd(__A),
  5962. (__v4df) __O);
  5963. }
  5964. static __inline__ __m256d __DEFAULT_FN_ATTRS
  5965. _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
  5966. {
  5967. return (__m256d)__builtin_ia32_selectpd_256(__M,
  5968. (__v4df) _mm256_broadcastsd_pd(__A),
  5969. (__v4df) _mm256_setzero_pd());
  5970. }
  5971. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5972. _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
  5973. {
  5974. return (__m128)__builtin_ia32_selectps_128(__M,
  5975. (__v4sf) _mm_broadcastss_ps(__A),
  5976. (__v4sf) __O);
  5977. }
  5978. static __inline__ __m128 __DEFAULT_FN_ATTRS
  5979. _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
  5980. {
  5981. return (__m128)__builtin_ia32_selectps_128(__M,
  5982. (__v4sf) _mm_broadcastss_ps(__A),
  5983. (__v4sf) _mm_setzero_ps());
  5984. }
  5985. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5986. _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
  5987. {
  5988. return (__m256)__builtin_ia32_selectps_256(__M,
  5989. (__v8sf) _mm256_broadcastss_ps(__A),
  5990. (__v8sf) __O);
  5991. }
  5992. static __inline__ __m256 __DEFAULT_FN_ATTRS
  5993. _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
  5994. {
  5995. return (__m256)__builtin_ia32_selectps_256(__M,
  5996. (__v8sf) _mm256_broadcastss_ps(__A),
  5997. (__v8sf) _mm256_setzero_ps());
  5998. }
  5999. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6000. _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  6001. {
  6002. return (__m128i)__builtin_ia32_selectd_128(__M,
  6003. (__v4si) _mm_broadcastd_epi32(__A),
  6004. (__v4si) __O);
  6005. }
  6006. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6007. _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
  6008. {
  6009. return (__m128i)__builtin_ia32_selectd_128(__M,
  6010. (__v4si) _mm_broadcastd_epi32(__A),
  6011. (__v4si) _mm_setzero_si128());
  6012. }
  6013. static __inline__ __m256i __DEFAULT_FN_ATTRS
  6014. _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
  6015. {
  6016. return (__m256i)__builtin_ia32_selectd_256(__M,
  6017. (__v8si) _mm256_broadcastd_epi32(__A),
  6018. (__v8si) __O);
  6019. }
  6020. static __inline__ __m256i __DEFAULT_FN_ATTRS
  6021. _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
  6022. {
  6023. return (__m256i)__builtin_ia32_selectd_256(__M,
  6024. (__v8si) _mm256_broadcastd_epi32(__A),
  6025. (__v8si) _mm256_setzero_si256());
  6026. }
  6027. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6028. _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
  6029. {
  6030. return (__m128i)__builtin_ia32_selectq_128(__M,
  6031. (__v2di) _mm_broadcastq_epi64(__A),
  6032. (__v2di) __O);
  6033. }
  6034. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6035. _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  6036. {
  6037. return (__m128i)__builtin_ia32_selectq_128(__M,
  6038. (__v2di) _mm_broadcastq_epi64(__A),
  6039. (__v2di) _mm_setzero_si128());
  6040. }
  6041. static __inline__ __m256i __DEFAULT_FN_ATTRS
  6042. _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
  6043. {
  6044. return (__m256i)__builtin_ia32_selectq_256(__M,
  6045. (__v4di) _mm256_broadcastq_epi64(__A),
  6046. (__v4di) __O);
  6047. }
  6048. static __inline__ __m256i __DEFAULT_FN_ATTRS
  6049. _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
  6050. {
  6051. return (__m256i)__builtin_ia32_selectq_256(__M,
  6052. (__v4di) _mm256_broadcastq_epi64(__A),
  6053. (__v4di) _mm256_setzero_si256());
  6054. }
  6055. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6056. _mm_cvtsepi32_epi8 (__m128i __A)
  6057. {
  6058. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  6059. (__v16qi)_mm_undefined_si128(),
  6060. (__mmask8) -1);
  6061. }
  6062. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6063. _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  6064. {
  6065. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  6066. (__v16qi) __O, __M);
  6067. }
  6068. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6069. _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
  6070. {
  6071. return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
  6072. (__v16qi) _mm_setzero_si128 (),
  6073. __M);
  6074. }
  6075. static __inline__ void __DEFAULT_FN_ATTRS
  6076. _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  6077. {
  6078. __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
  6079. }
  6080. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6081. _mm256_cvtsepi32_epi8 (__m256i __A)
  6082. {
  6083. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  6084. (__v16qi)_mm_undefined_si128(),
  6085. (__mmask8) -1);
  6086. }
  6087. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6088. _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  6089. {
  6090. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  6091. (__v16qi) __O, __M);
  6092. }
  6093. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6094. _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
  6095. {
  6096. return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
  6097. (__v16qi) _mm_setzero_si128 (),
  6098. __M);
  6099. }
  6100. static __inline__ void __DEFAULT_FN_ATTRS
  6101. _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  6102. {
  6103. __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
  6104. }
  6105. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6106. _mm_cvtsepi32_epi16 (__m128i __A)
  6107. {
  6108. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  6109. (__v8hi)_mm_setzero_si128 (),
  6110. (__mmask8) -1);
  6111. }
  6112. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6113. _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  6114. {
  6115. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  6116. (__v8hi)__O,
  6117. __M);
  6118. }
  6119. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6120. _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
  6121. {
  6122. return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
  6123. (__v8hi) _mm_setzero_si128 (),
  6124. __M);
  6125. }
  6126. static __inline__ void __DEFAULT_FN_ATTRS
  6127. _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  6128. {
  6129. __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
  6130. }
  6131. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6132. _mm256_cvtsepi32_epi16 (__m256i __A)
  6133. {
  6134. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  6135. (__v8hi)_mm_undefined_si128(),
  6136. (__mmask8) -1);
  6137. }
  6138. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6139. _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  6140. {
  6141. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  6142. (__v8hi) __O, __M);
  6143. }
  6144. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6145. _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
  6146. {
  6147. return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
  6148. (__v8hi) _mm_setzero_si128 (),
  6149. __M);
  6150. }
  6151. static __inline__ void __DEFAULT_FN_ATTRS
  6152. _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  6153. {
  6154. __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  6155. }
  6156. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6157. _mm_cvtsepi64_epi8 (__m128i __A)
  6158. {
  6159. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  6160. (__v16qi)_mm_undefined_si128(),
  6161. (__mmask8) -1);
  6162. }
  6163. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6164. _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  6165. {
  6166. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  6167. (__v16qi) __O, __M);
  6168. }
  6169. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6170. _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
  6171. {
  6172. return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
  6173. (__v16qi) _mm_setzero_si128 (),
  6174. __M);
  6175. }
  6176. static __inline__ void __DEFAULT_FN_ATTRS
  6177. _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  6178. {
  6179. __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
  6180. }
  6181. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6182. _mm256_cvtsepi64_epi8 (__m256i __A)
  6183. {
  6184. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  6185. (__v16qi)_mm_undefined_si128(),
  6186. (__mmask8) -1);
  6187. }
  6188. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6189. _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  6190. {
  6191. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  6192. (__v16qi) __O, __M);
  6193. }
  6194. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6195. _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
  6196. {
  6197. return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
  6198. (__v16qi) _mm_setzero_si128 (),
  6199. __M);
  6200. }
  6201. static __inline__ void __DEFAULT_FN_ATTRS
  6202. _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  6203. {
  6204. __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
  6205. }
  6206. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6207. _mm_cvtsepi64_epi32 (__m128i __A)
  6208. {
  6209. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  6210. (__v4si)_mm_undefined_si128(),
  6211. (__mmask8) -1);
  6212. }
  6213. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6214. _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  6215. {
  6216. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  6217. (__v4si) __O, __M);
  6218. }
  6219. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6220. _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
  6221. {
  6222. return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
  6223. (__v4si) _mm_setzero_si128 (),
  6224. __M);
  6225. }
  6226. static __inline__ void __DEFAULT_FN_ATTRS
  6227. _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  6228. {
  6229. __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
  6230. }
  6231. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6232. _mm256_cvtsepi64_epi32 (__m256i __A)
  6233. {
  6234. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  6235. (__v4si)_mm_undefined_si128(),
  6236. (__mmask8) -1);
  6237. }
  6238. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6239. _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  6240. {
  6241. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  6242. (__v4si)__O,
  6243. __M);
  6244. }
  6245. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6246. _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
  6247. {
  6248. return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
  6249. (__v4si) _mm_setzero_si128 (),
  6250. __M);
  6251. }
  6252. static __inline__ void __DEFAULT_FN_ATTRS
  6253. _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  6254. {
  6255. __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  6256. }
  6257. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6258. _mm_cvtsepi64_epi16 (__m128i __A)
  6259. {
  6260. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  6261. (__v8hi)_mm_undefined_si128(),
  6262. (__mmask8) -1);
  6263. }
  6264. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6265. _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  6266. {
  6267. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  6268. (__v8hi) __O, __M);
  6269. }
  6270. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6271. _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
  6272. {
  6273. return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
  6274. (__v8hi) _mm_setzero_si128 (),
  6275. __M);
  6276. }
  6277. static __inline__ void __DEFAULT_FN_ATTRS
  6278. _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  6279. {
  6280. __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
  6281. }
  6282. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6283. _mm256_cvtsepi64_epi16 (__m256i __A)
  6284. {
  6285. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  6286. (__v8hi)_mm_undefined_si128(),
  6287. (__mmask8) -1);
  6288. }
  6289. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6290. _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  6291. {
  6292. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  6293. (__v8hi) __O, __M);
  6294. }
  6295. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6296. _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
  6297. {
  6298. return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
  6299. (__v8hi) _mm_setzero_si128 (),
  6300. __M);
  6301. }
  6302. static __inline__ void __DEFAULT_FN_ATTRS
  6303. _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  6304. {
  6305. __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
  6306. }
  6307. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6308. _mm_cvtusepi32_epi8 (__m128i __A)
  6309. {
  6310. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  6311. (__v16qi)_mm_undefined_si128(),
  6312. (__mmask8) -1);
  6313. }
  6314. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6315. _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  6316. {
  6317. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  6318. (__v16qi) __O,
  6319. __M);
  6320. }
  6321. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6322. _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
  6323. {
  6324. return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
  6325. (__v16qi) _mm_setzero_si128 (),
  6326. __M);
  6327. }
  6328. static __inline__ void __DEFAULT_FN_ATTRS
  6329. _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  6330. {
  6331. __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
  6332. }
  6333. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6334. _mm256_cvtusepi32_epi8 (__m256i __A)
  6335. {
  6336. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  6337. (__v16qi)_mm_undefined_si128(),
  6338. (__mmask8) -1);
  6339. }
  6340. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6341. _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  6342. {
  6343. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  6344. (__v16qi) __O,
  6345. __M);
  6346. }
  6347. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6348. _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
  6349. {
  6350. return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
  6351. (__v16qi) _mm_setzero_si128 (),
  6352. __M);
  6353. }
  6354. static __inline__ void __DEFAULT_FN_ATTRS
  6355. _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  6356. {
  6357. __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
  6358. }
  6359. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6360. _mm_cvtusepi32_epi16 (__m128i __A)
  6361. {
  6362. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  6363. (__v8hi)_mm_undefined_si128(),
  6364. (__mmask8) -1);
  6365. }
  6366. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6367. _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  6368. {
  6369. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  6370. (__v8hi) __O, __M);
  6371. }
  6372. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6373. _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
  6374. {
  6375. return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
  6376. (__v8hi) _mm_setzero_si128 (),
  6377. __M);
  6378. }
  6379. static __inline__ void __DEFAULT_FN_ATTRS
  6380. _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  6381. {
  6382. __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
  6383. }
  6384. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6385. _mm256_cvtusepi32_epi16 (__m256i __A)
  6386. {
  6387. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  6388. (__v8hi) _mm_undefined_si128(),
  6389. (__mmask8) -1);
  6390. }
  6391. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6392. _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  6393. {
  6394. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  6395. (__v8hi) __O, __M);
  6396. }
  6397. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6398. _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
  6399. {
  6400. return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
  6401. (__v8hi) _mm_setzero_si128 (),
  6402. __M);
  6403. }
  6404. static __inline__ void __DEFAULT_FN_ATTRS
  6405. _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  6406. {
  6407. __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  6408. }
  6409. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6410. _mm_cvtusepi64_epi8 (__m128i __A)
  6411. {
  6412. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  6413. (__v16qi)_mm_undefined_si128(),
  6414. (__mmask8) -1);
  6415. }
  6416. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6417. _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  6418. {
  6419. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  6420. (__v16qi) __O,
  6421. __M);
  6422. }
  6423. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6424. _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
  6425. {
  6426. return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
  6427. (__v16qi) _mm_setzero_si128 (),
  6428. __M);
  6429. }
  6430. static __inline__ void __DEFAULT_FN_ATTRS
  6431. _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  6432. {
  6433. __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
  6434. }
  6435. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6436. _mm256_cvtusepi64_epi8 (__m256i __A)
  6437. {
  6438. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  6439. (__v16qi)_mm_undefined_si128(),
  6440. (__mmask8) -1);
  6441. }
  6442. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6443. _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  6444. {
  6445. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  6446. (__v16qi) __O,
  6447. __M);
  6448. }
  6449. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6450. _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
  6451. {
  6452. return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
  6453. (__v16qi) _mm_setzero_si128 (),
  6454. __M);
  6455. }
  6456. static __inline__ void __DEFAULT_FN_ATTRS
  6457. _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  6458. {
  6459. __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
  6460. }
  6461. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6462. _mm_cvtusepi64_epi32 (__m128i __A)
  6463. {
  6464. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  6465. (__v4si)_mm_undefined_si128(),
  6466. (__mmask8) -1);
  6467. }
  6468. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6469. _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  6470. {
  6471. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  6472. (__v4si) __O, __M);
  6473. }
  6474. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6475. _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
  6476. {
  6477. return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
  6478. (__v4si) _mm_setzero_si128 (),
  6479. __M);
  6480. }
  6481. static __inline__ void __DEFAULT_FN_ATTRS
  6482. _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  6483. {
  6484. __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
  6485. }
  6486. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6487. _mm256_cvtusepi64_epi32 (__m256i __A)
  6488. {
  6489. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  6490. (__v4si)_mm_undefined_si128(),
  6491. (__mmask8) -1);
  6492. }
  6493. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6494. _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  6495. {
  6496. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  6497. (__v4si) __O, __M);
  6498. }
  6499. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6500. _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
  6501. {
  6502. return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
  6503. (__v4si) _mm_setzero_si128 (),
  6504. __M);
  6505. }
  6506. static __inline__ void __DEFAULT_FN_ATTRS
  6507. _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  6508. {
  6509. __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  6510. }
  6511. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6512. _mm_cvtusepi64_epi16 (__m128i __A)
  6513. {
  6514. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  6515. (__v8hi)_mm_undefined_si128(),
  6516. (__mmask8) -1);
  6517. }
  6518. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6519. _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  6520. {
  6521. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  6522. (__v8hi) __O, __M);
  6523. }
  6524. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6525. _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
  6526. {
  6527. return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
  6528. (__v8hi) _mm_setzero_si128 (),
  6529. __M);
  6530. }
  6531. static __inline__ void __DEFAULT_FN_ATTRS
  6532. _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  6533. {
  6534. __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
  6535. }
  6536. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6537. _mm256_cvtusepi64_epi16 (__m256i __A)
  6538. {
  6539. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  6540. (__v8hi)_mm_undefined_si128(),
  6541. (__mmask8) -1);
  6542. }
  6543. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6544. _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  6545. {
  6546. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  6547. (__v8hi) __O, __M);
  6548. }
  6549. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6550. _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
  6551. {
  6552. return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
  6553. (__v8hi) _mm_setzero_si128 (),
  6554. __M);
  6555. }
  6556. static __inline__ void __DEFAULT_FN_ATTRS
  6557. _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  6558. {
  6559. return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
  6560. }
  6561. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6562. _mm_cvtepi32_epi8 (__m128i __A)
  6563. {
  6564. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  6565. (__v16qi)_mm_undefined_si128(),
  6566. (__mmask8) -1);
  6567. }
  6568. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6569. _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  6570. {
  6571. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  6572. (__v16qi) __O, __M);
  6573. }
  6574. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6575. _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
  6576. {
  6577. return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
  6578. (__v16qi)
  6579. _mm_setzero_si128 (),
  6580. __M);
  6581. }
  6582. static __inline__ void __DEFAULT_FN_ATTRS
  6583. _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  6584. {
  6585. __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
  6586. }
  6587. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6588. _mm256_cvtepi32_epi8 (__m256i __A)
  6589. {
  6590. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  6591. (__v16qi)_mm_undefined_si128(),
  6592. (__mmask8) -1);
  6593. }
  6594. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6595. _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  6596. {
  6597. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  6598. (__v16qi) __O, __M);
  6599. }
  6600. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6601. _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
  6602. {
  6603. return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
  6604. (__v16qi) _mm_setzero_si128 (),
  6605. __M);
  6606. }
  6607. static __inline__ void __DEFAULT_FN_ATTRS
  6608. _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  6609. {
  6610. __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
  6611. }
  6612. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6613. _mm_cvtepi32_epi16 (__m128i __A)
  6614. {
  6615. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  6616. (__v8hi) _mm_setzero_si128 (),
  6617. (__mmask8) -1);
  6618. }
  6619. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6620. _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  6621. {
  6622. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  6623. (__v8hi) __O, __M);
  6624. }
  6625. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6626. _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
  6627. {
  6628. return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
  6629. (__v8hi) _mm_setzero_si128 (),
  6630. __M);
  6631. }
  6632. static __inline__ void __DEFAULT_FN_ATTRS
  6633. _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  6634. {
  6635. __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
  6636. }
  6637. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6638. _mm256_cvtepi32_epi16 (__m256i __A)
  6639. {
  6640. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  6641. (__v8hi)_mm_setzero_si128 (),
  6642. (__mmask8) -1);
  6643. }
  6644. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6645. _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  6646. {
  6647. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  6648. (__v8hi) __O, __M);
  6649. }
  6650. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6651. _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
  6652. {
  6653. return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
  6654. (__v8hi) _mm_setzero_si128 (),
  6655. __M);
  6656. }
  6657. static __inline__ void __DEFAULT_FN_ATTRS
  6658. _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  6659. {
  6660. __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
  6661. }
  6662. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6663. _mm_cvtepi64_epi8 (__m128i __A)
  6664. {
  6665. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  6666. (__v16qi) _mm_undefined_si128(),
  6667. (__mmask8) -1);
  6668. }
  6669. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6670. _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
  6671. {
  6672. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  6673. (__v16qi) __O, __M);
  6674. }
  6675. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6676. _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
  6677. {
  6678. return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
  6679. (__v16qi) _mm_setzero_si128 (),
  6680. __M);
  6681. }
  6682. static __inline__ void __DEFAULT_FN_ATTRS
  6683. _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
  6684. {
  6685. __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
  6686. }
  6687. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6688. _mm256_cvtepi64_epi8 (__m256i __A)
  6689. {
  6690. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  6691. (__v16qi) _mm_undefined_si128(),
  6692. (__mmask8) -1);
  6693. }
  6694. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6695. _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
  6696. {
  6697. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  6698. (__v16qi) __O, __M);
  6699. }
  6700. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6701. _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
  6702. {
  6703. return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
  6704. (__v16qi) _mm_setzero_si128 (),
  6705. __M);
  6706. }
  6707. static __inline__ void __DEFAULT_FN_ATTRS
  6708. _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
  6709. {
  6710. __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
  6711. }
  6712. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6713. _mm_cvtepi64_epi32 (__m128i __A)
  6714. {
  6715. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  6716. (__v4si)_mm_undefined_si128(),
  6717. (__mmask8) -1);
  6718. }
  6719. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6720. _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
  6721. {
  6722. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  6723. (__v4si) __O, __M);
  6724. }
  6725. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6726. _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
  6727. {
  6728. return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
  6729. (__v4si) _mm_setzero_si128 (),
  6730. __M);
  6731. }
  6732. static __inline__ void __DEFAULT_FN_ATTRS
  6733. _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
  6734. {
  6735. __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
  6736. }
  6737. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6738. _mm256_cvtepi64_epi32 (__m256i __A)
  6739. {
  6740. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  6741. (__v4si) _mm_undefined_si128(),
  6742. (__mmask8) -1);
  6743. }
  6744. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6745. _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
  6746. {
  6747. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  6748. (__v4si) __O, __M);
  6749. }
  6750. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6751. _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
  6752. {
  6753. return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
  6754. (__v4si) _mm_setzero_si128 (),
  6755. __M);
  6756. }
  6757. static __inline__ void __DEFAULT_FN_ATTRS
  6758. _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
  6759. {
  6760. __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
  6761. }
  6762. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6763. _mm_cvtepi64_epi16 (__m128i __A)
  6764. {
  6765. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  6766. (__v8hi) _mm_undefined_si128(),
  6767. (__mmask8) -1);
  6768. }
  6769. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6770. _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
  6771. {
  6772. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  6773. (__v8hi)__O,
  6774. __M);
  6775. }
  6776. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6777. _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
  6778. {
  6779. return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
  6780. (__v8hi) _mm_setzero_si128 (),
  6781. __M);
  6782. }
  6783. static __inline__ void __DEFAULT_FN_ATTRS
  6784. _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
  6785. {
  6786. __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
  6787. }
  6788. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6789. _mm256_cvtepi64_epi16 (__m256i __A)
  6790. {
  6791. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  6792. (__v8hi)_mm_undefined_si128(),
  6793. (__mmask8) -1);
  6794. }
  6795. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6796. _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
  6797. {
  6798. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  6799. (__v8hi) __O, __M);
  6800. }
  6801. static __inline__ __m128i __DEFAULT_FN_ATTRS
  6802. _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
  6803. {
  6804. return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
  6805. (__v8hi) _mm_setzero_si128 (),
  6806. __M);
  6807. }
  6808. static __inline__ void __DEFAULT_FN_ATTRS
  6809. _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
  6810. {
  6811. __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
  6812. }
  6813. #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
  6814. (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \
  6815. (__v8sf)_mm256_undefined_ps(), \
  6816. ((imm) & 1) ? 4 : 0, \
  6817. ((imm) & 1) ? 5 : 1, \
  6818. ((imm) & 1) ? 6 : 2, \
  6819. ((imm) & 1) ? 7 : 3); })
  6820. #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
  6821. (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
  6822. (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
  6823. (__v4sf)(W)); })
  6824. #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
  6825. (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
  6826. (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
  6827. (__v4sf)_mm_setzero_ps()); })
  6828. #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
  6829. (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \
  6830. (__v8si)_mm256_undefined_si256(), \
  6831. ((imm) & 1) ? 4 : 0, \
  6832. ((imm) & 1) ? 5 : 1, \
  6833. ((imm) & 1) ? 6 : 2, \
  6834. ((imm) & 1) ? 7 : 3); })
  6835. #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
  6836. (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
  6837. (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
  6838. (__v4si)(W)); })
  6839. #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
  6840. (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
  6841. (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
  6842. (__v4si)_mm_setzero_si128()); })
  6843. #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
  6844. (__m256)__builtin_shufflevector((__v8sf)(A), \
  6845. (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
  6846. ((imm) & 0x1) ? 0 : 8, \
  6847. ((imm) & 0x1) ? 1 : 9, \
  6848. ((imm) & 0x1) ? 2 : 10, \
  6849. ((imm) & 0x1) ? 3 : 11, \
  6850. ((imm) & 0x1) ? 8 : 4, \
  6851. ((imm) & 0x1) ? 9 : 5, \
  6852. ((imm) & 0x1) ? 10 : 6, \
  6853. ((imm) & 0x1) ? 11 : 7); })
  6854. #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
  6855. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  6856. (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
  6857. (__v8sf)(W)); })
  6858. #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
  6859. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  6860. (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
  6861. (__v8sf)_mm256_setzero_ps()); })
  6862. #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
  6863. (__m256i)__builtin_shufflevector((__v8si)(A), \
  6864. (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
  6865. ((imm) & 0x1) ? 0 : 8, \
  6866. ((imm) & 0x1) ? 1 : 9, \
  6867. ((imm) & 0x1) ? 2 : 10, \
  6868. ((imm) & 0x1) ? 3 : 11, \
  6869. ((imm) & 0x1) ? 8 : 4, \
  6870. ((imm) & 0x1) ? 9 : 5, \
  6871. ((imm) & 0x1) ? 10 : 6, \
  6872. ((imm) & 0x1) ? 11 : 7); })
  6873. #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
  6874. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  6875. (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
  6876. (__v8si)(W)); })
  6877. #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
  6878. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  6879. (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
  6880. (__v8si)_mm256_setzero_si256()); })
  6881. #define _mm_getmant_pd(A, B, C) __extension__({\
  6882. (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
  6883. (int)(((C)<<2) | (B)), \
  6884. (__v2df)_mm_setzero_pd(), \
  6885. (__mmask8)-1); })
  6886. #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
  6887. (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
  6888. (int)(((C)<<2) | (B)), \
  6889. (__v2df)(__m128d)(W), \
  6890. (__mmask8)(U)); })
  6891. #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
  6892. (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
  6893. (int)(((C)<<2) | (B)), \
  6894. (__v2df)_mm_setzero_pd(), \
  6895. (__mmask8)(U)); })
  6896. #define _mm256_getmant_pd(A, B, C) __extension__ ({ \
  6897. (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
  6898. (int)(((C)<<2) | (B)), \
  6899. (__v4df)_mm256_setzero_pd(), \
  6900. (__mmask8)-1); })
  6901. #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
  6902. (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
  6903. (int)(((C)<<2) | (B)), \
  6904. (__v4df)(__m256d)(W), \
  6905. (__mmask8)(U)); })
  6906. #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
  6907. (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
  6908. (int)(((C)<<2) | (B)), \
  6909. (__v4df)_mm256_setzero_pd(), \
  6910. (__mmask8)(U)); })
  6911. #define _mm_getmant_ps(A, B, C) __extension__ ({ \
  6912. (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
  6913. (int)(((C)<<2) | (B)), \
  6914. (__v4sf)_mm_setzero_ps(), \
  6915. (__mmask8)-1); })
  6916. #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
  6917. (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
  6918. (int)(((C)<<2) | (B)), \
  6919. (__v4sf)(__m128)(W), \
  6920. (__mmask8)(U)); })
  6921. #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
  6922. (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
  6923. (int)(((C)<<2) | (B)), \
  6924. (__v4sf)_mm_setzero_ps(), \
  6925. (__mmask8)(U)); })
  6926. #define _mm256_getmant_ps(A, B, C) __extension__ ({ \
  6927. (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
  6928. (int)(((C)<<2) | (B)), \
  6929. (__v8sf)_mm256_setzero_ps(), \
  6930. (__mmask8)-1); })
  6931. #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
  6932. (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
  6933. (int)(((C)<<2) | (B)), \
  6934. (__v8sf)(__m256)(W), \
  6935. (__mmask8)(U)); })
  6936. #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
  6937. (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
  6938. (int)(((C)<<2) | (B)), \
  6939. (__v8sf)_mm256_setzero_ps(), \
  6940. (__mmask8)(U)); })
  6941. #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
  6942. (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
  6943. (double const *)(addr), \
  6944. (__v2di)(__m128i)(index), \
  6945. (__mmask8)(mask), (int)(scale)); })
  6946. #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
  6947. (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
  6948. (long long const *)(addr), \
  6949. (__v2di)(__m128i)(index), \
  6950. (__mmask8)(mask), (int)(scale)); })
  6951. #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
  6952. (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
  6953. (double const *)(addr), \
  6954. (__v4di)(__m256i)(index), \
  6955. (__mmask8)(mask), (int)(scale)); })
  6956. #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
  6957. (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
  6958. (long long const *)(addr), \
  6959. (__v4di)(__m256i)(index), \
  6960. (__mmask8)(mask), (int)(scale)); })
  6961. #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
  6962. (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
  6963. (float const *)(addr), \
  6964. (__v2di)(__m128i)(index), \
  6965. (__mmask8)(mask), (int)(scale)); })
  6966. #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
  6967. (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
  6968. (int const *)(addr), \
  6969. (__v2di)(__m128i)(index), \
  6970. (__mmask8)(mask), (int)(scale)); })
  6971. #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
  6972. (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
  6973. (float const *)(addr), \
  6974. (__v4di)(__m256i)(index), \
  6975. (__mmask8)(mask), (int)(scale)); })
  6976. #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
  6977. (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
  6978. (int const *)(addr), \
  6979. (__v4di)(__m256i)(index), \
  6980. (__mmask8)(mask), (int)(scale)); })
  6981. #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
  6982. (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
  6983. (double const *)(addr), \
  6984. (__v4si)(__m128i)(index), \
  6985. (__mmask8)(mask), (int)(scale)); })
  6986. #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
  6987. (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
  6988. (long long const *)(addr), \
  6989. (__v4si)(__m128i)(index), \
  6990. (__mmask8)(mask), (int)(scale)); })
  6991. #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
  6992. (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
  6993. (double const *)(addr), \
  6994. (__v4si)(__m128i)(index), \
  6995. (__mmask8)(mask), (int)(scale)); })
  6996. #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
  6997. (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
  6998. (long long const *)(addr), \
  6999. (__v4si)(__m128i)(index), \
  7000. (__mmask8)(mask), (int)(scale)); })
  7001. #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
  7002. (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
  7003. (float const *)(addr), \
  7004. (__v4si)(__m128i)(index), \
  7005. (__mmask8)(mask), (int)(scale)); })
  7006. #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
  7007. (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
  7008. (int const *)(addr), \
  7009. (__v4si)(__m128i)(index), \
  7010. (__mmask8)(mask), (int)(scale)); })
  7011. #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
  7012. (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
  7013. (float const *)(addr), \
  7014. (__v8si)(__m256i)(index), \
  7015. (__mmask8)(mask), (int)(scale)); })
  7016. #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
  7017. (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
  7018. (int const *)(addr), \
  7019. (__v8si)(__m256i)(index), \
  7020. (__mmask8)(mask), (int)(scale)); })
  7021. #define _mm256_permutex_pd(X, C) __extension__ ({ \
  7022. (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
  7023. (__v4df)_mm256_undefined_pd(), \
  7024. ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
  7025. ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
  7026. #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
  7027. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  7028. (__v4df)_mm256_permutex_pd((X), (C)), \
  7029. (__v4df)(__m256d)(W)); })
  7030. #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
  7031. (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
  7032. (__v4df)_mm256_permutex_pd((X), (C)), \
  7033. (__v4df)_mm256_setzero_pd()); })
  7034. #define _mm256_permutex_epi64(X, C) __extension__ ({ \
  7035. (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
  7036. (__v4di)_mm256_undefined_si256(), \
  7037. ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
  7038. ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
  7039. #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
  7040. (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
  7041. (__v4di)_mm256_permutex_epi64((X), (C)), \
  7042. (__v4di)(__m256i)(W)); })
  7043. #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
  7044. (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
  7045. (__v4di)_mm256_permutex_epi64((X), (C)), \
  7046. (__v4di)_mm256_setzero_si256()); })
  7047. static __inline__ __m256d __DEFAULT_FN_ATTRS
  7048. _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
  7049. {
  7050. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  7051. (__v4di) __X,
  7052. (__v4df) _mm256_undefined_si256 (),
  7053. (__mmask8) -1);
  7054. }
  7055. static __inline__ __m256d __DEFAULT_FN_ATTRS
  7056. _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
  7057. __m256d __Y)
  7058. {
  7059. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  7060. (__v4di) __X,
  7061. (__v4df) __W,
  7062. (__mmask8) __U);
  7063. }
  7064. static __inline__ __m256d __DEFAULT_FN_ATTRS
  7065. _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
  7066. {
  7067. return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
  7068. (__v4di) __X,
  7069. (__v4df) _mm256_setzero_pd (),
  7070. (__mmask8) __U);
  7071. }
  7072. static __inline__ __m256i __DEFAULT_FN_ATTRS
  7073. _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
  7074. {
  7075. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  7076. (__v4di) __X,
  7077. (__v4di) _mm256_setzero_si256 (),
  7078. (__mmask8) __M);
  7079. }
  7080. static __inline__ __m256i __DEFAULT_FN_ATTRS
  7081. _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
  7082. {
  7083. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  7084. (__v4di) __X,
  7085. (__v4di) _mm256_undefined_si256 (),
  7086. (__mmask8) -1);
  7087. }
  7088. static __inline__ __m256i __DEFAULT_FN_ATTRS
  7089. _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
  7090. __m256i __Y)
  7091. {
  7092. return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
  7093. (__v4di) __X,
  7094. (__v4di) __W,
  7095. __M);
  7096. }
  7097. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7098. _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
  7099. __m256 __Y)
  7100. {
  7101. return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
  7102. (__v8si) __X,
  7103. (__v8sf) __W,
  7104. (__mmask8) __U);
  7105. }
  7106. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7107. _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
  7108. {
  7109. return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
  7110. (__v8si) __X,
  7111. (__v8sf) _mm256_setzero_ps (),
  7112. (__mmask8) __U);
  7113. }
  7114. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7115. _mm256_permutexvar_ps (__m256i __X, __m256 __Y)
  7116. {
  7117. return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
  7118. (__v8si) __X,
  7119. (__v8sf) _mm256_undefined_si256 (),
  7120. (__mmask8) -1);
  7121. }
  7122. static __inline__ __m256i __DEFAULT_FN_ATTRS
  7123. _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
  7124. {
  7125. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  7126. (__v8si) __X,
  7127. (__v8si) _mm256_setzero_si256 (),
  7128. __M);
  7129. }
  7130. static __inline__ __m256i __DEFAULT_FN_ATTRS
  7131. _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
  7132. __m256i __Y)
  7133. {
  7134. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  7135. (__v8si) __X,
  7136. (__v8si) __W,
  7137. (__mmask8) __M);
  7138. }
  7139. static __inline__ __m256i __DEFAULT_FN_ATTRS
  7140. _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
  7141. {
  7142. return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
  7143. (__v8si) __X,
  7144. (__v8si) _mm256_undefined_si256(),
  7145. (__mmask8) -1);
  7146. }
  7147. #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
  7148. (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \
  7149. (__v4si)(__m128i)(A), \
  7150. ((int)(imm) & 0x3) + 0, \
  7151. ((int)(imm) & 0x3) + 1, \
  7152. ((int)(imm) & 0x3) + 2, \
  7153. ((int)(imm) & 0x3) + 3); })
  7154. #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
  7155. (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
  7156. (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
  7157. (__v4si)(__m128i)(W)); })
  7158. #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
  7159. (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
  7160. (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
  7161. (__v4si)_mm_setzero_si128()); })
  7162. #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
  7163. (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \
  7164. (__v8si)(__m256i)(A), \
  7165. ((int)(imm) & 0x7) + 0, \
  7166. ((int)(imm) & 0x7) + 1, \
  7167. ((int)(imm) & 0x7) + 2, \
  7168. ((int)(imm) & 0x7) + 3, \
  7169. ((int)(imm) & 0x7) + 4, \
  7170. ((int)(imm) & 0x7) + 5, \
  7171. ((int)(imm) & 0x7) + 6, \
  7172. ((int)(imm) & 0x7) + 7); })
  7173. #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
  7174. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  7175. (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
  7176. (__v8si)(__m256i)(W)); })
  7177. #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
  7178. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  7179. (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
  7180. (__v8si)_mm256_setzero_si256()); })
  7181. #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
  7182. (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \
  7183. (__v2di)(__m128i)(A), \
  7184. ((int)(imm) & 0x1) + 0, \
  7185. ((int)(imm) & 0x1) + 1); })
  7186. #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
  7187. (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
  7188. (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
  7189. (__v2di)(__m128i)(W)); })
  7190. #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
  7191. (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
  7192. (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
  7193. (__v2di)_mm_setzero_di()); })
  7194. #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
  7195. (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \
  7196. (__v4di)(__m256i)(A), \
  7197. ((int)(imm) & 0x3) + 0, \
  7198. ((int)(imm) & 0x3) + 1, \
  7199. ((int)(imm) & 0x3) + 2, \
  7200. ((int)(imm) & 0x3) + 3); })
  7201. #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
  7202. (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
  7203. (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
  7204. (__v4di)(__m256i)(W)); })
  7205. #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
  7206. (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
  7207. (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
  7208. (__v4di)_mm256_setzero_si256()); })
  7209. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7210. _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
  7211. {
  7212. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  7213. (__v4sf)_mm_movehdup_ps(__A),
  7214. (__v4sf)__W);
  7215. }
  7216. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7217. _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
  7218. {
  7219. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  7220. (__v4sf)_mm_movehdup_ps(__A),
  7221. (__v4sf)_mm_setzero_ps());
  7222. }
  7223. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7224. _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
  7225. {
  7226. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  7227. (__v8sf)_mm256_movehdup_ps(__A),
  7228. (__v8sf)__W);
  7229. }
  7230. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7231. _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
  7232. {
  7233. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  7234. (__v8sf)_mm256_movehdup_ps(__A),
  7235. (__v8sf)_mm256_setzero_ps());
  7236. }
  7237. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7238. _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
  7239. {
  7240. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  7241. (__v4sf)_mm_moveldup_ps(__A),
  7242. (__v4sf)__W);
  7243. }
  7244. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7245. _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
  7246. {
  7247. return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
  7248. (__v4sf)_mm_moveldup_ps(__A),
  7249. (__v4sf)_mm_setzero_ps());
  7250. }
  7251. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7252. _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
  7253. {
  7254. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  7255. (__v8sf)_mm256_moveldup_ps(__A),
  7256. (__v8sf)__W);
  7257. }
  7258. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7259. _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
  7260. {
  7261. return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
  7262. (__v8sf)_mm256_moveldup_ps(__A),
  7263. (__v8sf)_mm256_setzero_ps());
  7264. }
  7265. #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
  7266. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  7267. (__v8si)_mm256_shuffle_epi32((A), (I)), \
  7268. (__v8si)(__m256i)(W)); })
  7269. #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
  7270. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  7271. (__v8si)_mm256_shuffle_epi32((A), (I)), \
  7272. (__v8si)_mm256_setzero_si256()); })
  7273. #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
  7274. (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
  7275. (__v4si)_mm_shuffle_epi32((A), (I)), \
  7276. (__v4si)(__m128i)(W)); })
  7277. #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
  7278. (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
  7279. (__v4si)_mm_shuffle_epi32((A), (I)), \
  7280. (__v4si)_mm_setzero_si128()); })
  7281. static __inline__ __m128d __DEFAULT_FN_ATTRS
  7282. _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
  7283. {
  7284. return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
  7285. (__v2df) __A,
  7286. (__v2df) __W);
  7287. }
  7288. static __inline__ __m128d __DEFAULT_FN_ATTRS
  7289. _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
  7290. {
  7291. return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
  7292. (__v2df) __A,
  7293. (__v2df) _mm_setzero_pd ());
  7294. }
  7295. static __inline__ __m256d __DEFAULT_FN_ATTRS
  7296. _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
  7297. {
  7298. return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
  7299. (__v4df) __A,
  7300. (__v4df) __W);
  7301. }
  7302. static __inline__ __m256d __DEFAULT_FN_ATTRS
  7303. _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
  7304. {
  7305. return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
  7306. (__v4df) __A,
  7307. (__v4df) _mm256_setzero_pd ());
  7308. }
  7309. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7310. _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
  7311. {
  7312. return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
  7313. (__v4sf) __A,
  7314. (__v4sf) __W);
  7315. }
  7316. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7317. _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
  7318. {
  7319. return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
  7320. (__v4sf) __A,
  7321. (__v4sf) _mm_setzero_ps ());
  7322. }
  7323. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7324. _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
  7325. {
  7326. return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
  7327. (__v8sf) __A,
  7328. (__v8sf) __W);
  7329. }
  7330. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7331. _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
  7332. {
  7333. return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
  7334. (__v8sf) __A,
  7335. (__v8sf) _mm256_setzero_ps ());
  7336. }
  7337. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7338. _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
  7339. {
  7340. return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
  7341. (__v4sf) __W,
  7342. (__mmask8) __U);
  7343. }
  7344. static __inline__ __m128 __DEFAULT_FN_ATTRS
  7345. _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
  7346. {
  7347. return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
  7348. (__v4sf)
  7349. _mm_setzero_ps (),
  7350. (__mmask8) __U);
  7351. }
  7352. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7353. _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
  7354. {
  7355. return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
  7356. (__v8sf) __W,
  7357. (__mmask8) __U);
  7358. }
  7359. static __inline__ __m256 __DEFAULT_FN_ATTRS
  7360. _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
  7361. {
  7362. return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
  7363. (__v8sf)
  7364. _mm256_setzero_ps (),
  7365. (__mmask8) __U);
  7366. }
  7367. static __inline __m128i __DEFAULT_FN_ATTRS
  7368. _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
  7369. {
  7370. return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
  7371. (__v8hi) __W,
  7372. (__mmask8) __U);
  7373. }
  7374. static __inline __m128i __DEFAULT_FN_ATTRS
  7375. _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
  7376. {
  7377. return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
  7378. (__v8hi) _mm_setzero_si128 (),
  7379. (__mmask8) __U);
  7380. }
  7381. #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
  7382. (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
  7383. (__v8hi)(__m128i)(W), \
  7384. (__mmask8)(U)); })
  7385. #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
  7386. (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
  7387. (__v8hi)_mm_setzero_si128(), \
  7388. (__mmask8)(U)); })
  7389. static __inline __m128i __DEFAULT_FN_ATTRS
  7390. _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
  7391. {
  7392. return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
  7393. (__v8hi) __W,
  7394. (__mmask8) __U);
  7395. }
  7396. static __inline __m128i __DEFAULT_FN_ATTRS
  7397. _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
  7398. {
  7399. return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
  7400. (__v8hi) _mm_setzero_si128(),
  7401. (__mmask8) __U);
  7402. }
  7403. #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
  7404. (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
  7405. (__v8hi)(__m128i)(W), \
  7406. (__mmask8)(U)); })
  7407. #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
  7408. (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
  7409. (__v8hi)_mm_setzero_si128(), \
  7410. (__mmask8)(U)); })
  7411. #undef __DEFAULT_FN_ATTRS
  7412. #endif /* __AVX512VLINTRIN_H */