avx512dqintrin.h 61 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331
  1. /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __IMMINTRIN_H
  24. #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
  25. #endif
  26. #ifndef __AVX512DQINTRIN_H
  27. #define __AVX512DQINTRIN_H
  28. /* Define the default attributes for the functions in this file. */
  29. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
  30. static __inline__ __m512i __DEFAULT_FN_ATTRS
  31. _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
  32. return (__m512i) ((__v8du) __A * (__v8du) __B);
  33. }
  34. static __inline__ __m512i __DEFAULT_FN_ATTRS
  35. _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
  36. return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
  37. (__v8di)_mm512_mullo_epi64(__A, __B),
  38. (__v8di)__W);
  39. }
  40. static __inline__ __m512i __DEFAULT_FN_ATTRS
  41. _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
  42. return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
  43. (__v8di)_mm512_mullo_epi64(__A, __B),
  44. (__v8di)_mm512_setzero_si512());
  45. }
  46. static __inline__ __m512d __DEFAULT_FN_ATTRS
  47. _mm512_xor_pd(__m512d __A, __m512d __B) {
  48. return (__m512d)((__v8du)__A ^ (__v8du)__B);
  49. }
  50. static __inline__ __m512d __DEFAULT_FN_ATTRS
  51. _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
  52. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  53. (__v8df)_mm512_xor_pd(__A, __B),
  54. (__v8df)__W);
  55. }
  56. static __inline__ __m512d __DEFAULT_FN_ATTRS
  57. _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
  58. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  59. (__v8df)_mm512_xor_pd(__A, __B),
  60. (__v8df)_mm512_setzero_pd());
  61. }
  62. static __inline__ __m512 __DEFAULT_FN_ATTRS
  63. _mm512_xor_ps (__m512 __A, __m512 __B) {
  64. return (__m512)((__v16su)__A ^ (__v16su)__B);
  65. }
  66. static __inline__ __m512 __DEFAULT_FN_ATTRS
  67. _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
  68. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  69. (__v16sf)_mm512_xor_ps(__A, __B),
  70. (__v16sf)__W);
  71. }
  72. static __inline__ __m512 __DEFAULT_FN_ATTRS
  73. _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
  74. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  75. (__v16sf)_mm512_xor_ps(__A, __B),
  76. (__v16sf)_mm512_setzero_ps());
  77. }
  78. static __inline__ __m512d __DEFAULT_FN_ATTRS
  79. _mm512_or_pd(__m512d __A, __m512d __B) {
  80. return (__m512d)((__v8du)__A | (__v8du)__B);
  81. }
  82. static __inline__ __m512d __DEFAULT_FN_ATTRS
  83. _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
  84. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  85. (__v8df)_mm512_or_pd(__A, __B),
  86. (__v8df)__W);
  87. }
  88. static __inline__ __m512d __DEFAULT_FN_ATTRS
  89. _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
  90. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  91. (__v8df)_mm512_or_pd(__A, __B),
  92. (__v8df)_mm512_setzero_pd());
  93. }
  94. static __inline__ __m512 __DEFAULT_FN_ATTRS
  95. _mm512_or_ps(__m512 __A, __m512 __B) {
  96. return (__m512)((__v16su)__A | (__v16su)__B);
  97. }
  98. static __inline__ __m512 __DEFAULT_FN_ATTRS
  99. _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
  100. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  101. (__v16sf)_mm512_or_ps(__A, __B),
  102. (__v16sf)__W);
  103. }
  104. static __inline__ __m512 __DEFAULT_FN_ATTRS
  105. _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
  106. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  107. (__v16sf)_mm512_or_ps(__A, __B),
  108. (__v16sf)_mm512_setzero_ps());
  109. }
  110. static __inline__ __m512d __DEFAULT_FN_ATTRS
  111. _mm512_and_pd(__m512d __A, __m512d __B) {
  112. return (__m512d)((__v8du)__A & (__v8du)__B);
  113. }
  114. static __inline__ __m512d __DEFAULT_FN_ATTRS
  115. _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
  116. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  117. (__v8df)_mm512_and_pd(__A, __B),
  118. (__v8df)__W);
  119. }
  120. static __inline__ __m512d __DEFAULT_FN_ATTRS
  121. _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
  122. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  123. (__v8df)_mm512_and_pd(__A, __B),
  124. (__v8df)_mm512_setzero_pd());
  125. }
  126. static __inline__ __m512 __DEFAULT_FN_ATTRS
  127. _mm512_and_ps(__m512 __A, __m512 __B) {
  128. return (__m512)((__v16su)__A & (__v16su)__B);
  129. }
  130. static __inline__ __m512 __DEFAULT_FN_ATTRS
  131. _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
  132. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  133. (__v16sf)_mm512_and_ps(__A, __B),
  134. (__v16sf)__W);
  135. }
  136. static __inline__ __m512 __DEFAULT_FN_ATTRS
  137. _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
  138. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  139. (__v16sf)_mm512_and_ps(__A, __B),
  140. (__v16sf)_mm512_setzero_ps());
  141. }
  142. static __inline__ __m512d __DEFAULT_FN_ATTRS
  143. _mm512_andnot_pd(__m512d __A, __m512d __B) {
  144. return (__m512d)(~(__v8du)__A & (__v8du)__B);
  145. }
  146. static __inline__ __m512d __DEFAULT_FN_ATTRS
  147. _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
  148. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  149. (__v8df)_mm512_andnot_pd(__A, __B),
  150. (__v8df)__W);
  151. }
  152. static __inline__ __m512d __DEFAULT_FN_ATTRS
  153. _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
  154. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
  155. (__v8df)_mm512_andnot_pd(__A, __B),
  156. (__v8df)_mm512_setzero_pd());
  157. }
  158. static __inline__ __m512 __DEFAULT_FN_ATTRS
  159. _mm512_andnot_ps(__m512 __A, __m512 __B) {
  160. return (__m512)(~(__v16su)__A & (__v16su)__B);
  161. }
  162. static __inline__ __m512 __DEFAULT_FN_ATTRS
  163. _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
  164. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  165. (__v16sf)_mm512_andnot_ps(__A, __B),
  166. (__v16sf)__W);
  167. }
  168. static __inline__ __m512 __DEFAULT_FN_ATTRS
  169. _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
  170. return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
  171. (__v16sf)_mm512_andnot_ps(__A, __B),
  172. (__v16sf)_mm512_setzero_ps());
  173. }
  174. static __inline__ __m512i __DEFAULT_FN_ATTRS
  175. _mm512_cvtpd_epi64 (__m512d __A) {
  176. return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
  177. (__v8di) _mm512_setzero_si512(),
  178. (__mmask8) -1,
  179. _MM_FROUND_CUR_DIRECTION);
  180. }
  181. static __inline__ __m512i __DEFAULT_FN_ATTRS
  182. _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
  183. return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
  184. (__v8di) __W,
  185. (__mmask8) __U,
  186. _MM_FROUND_CUR_DIRECTION);
  187. }
  188. static __inline__ __m512i __DEFAULT_FN_ATTRS
  189. _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
  190. return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
  191. (__v8di) _mm512_setzero_si512(),
  192. (__mmask8) __U,
  193. _MM_FROUND_CUR_DIRECTION);
  194. }
  195. #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \
  196. (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
  197. (__v8di)_mm512_setzero_si512(), \
  198. (__mmask8)-1, (int)(R)); })
  199. #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
  200. (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
  201. (__v8di)(__m512i)(W), \
  202. (__mmask8)(U), (int)(R)); })
  203. #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \
  204. (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
  205. (__v8di)_mm512_setzero_si512(), \
  206. (__mmask8)(U), (int)(R)); })
  207. static __inline__ __m512i __DEFAULT_FN_ATTRS
  208. _mm512_cvtpd_epu64 (__m512d __A) {
  209. return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
  210. (__v8di) _mm512_setzero_si512(),
  211. (__mmask8) -1,
  212. _MM_FROUND_CUR_DIRECTION);
  213. }
  214. static __inline__ __m512i __DEFAULT_FN_ATTRS
  215. _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
  216. return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
  217. (__v8di) __W,
  218. (__mmask8) __U,
  219. _MM_FROUND_CUR_DIRECTION);
  220. }
  221. static __inline__ __m512i __DEFAULT_FN_ATTRS
  222. _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
  223. return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
  224. (__v8di) _mm512_setzero_si512(),
  225. (__mmask8) __U,
  226. _MM_FROUND_CUR_DIRECTION);
  227. }
  228. #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \
  229. (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
  230. (__v8di)_mm512_setzero_si512(), \
  231. (__mmask8)-1, (int)(R)); })
  232. #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
  233. (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
  234. (__v8di)(__m512i)(W), \
  235. (__mmask8)(U), (int)(R)); })
  236. #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \
  237. (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
  238. (__v8di)_mm512_setzero_si512(), \
  239. (__mmask8)(U), (int)(R)); })
  240. static __inline__ __m512i __DEFAULT_FN_ATTRS
  241. _mm512_cvtps_epi64 (__m256 __A) {
  242. return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
  243. (__v8di) _mm512_setzero_si512(),
  244. (__mmask8) -1,
  245. _MM_FROUND_CUR_DIRECTION);
  246. }
  247. static __inline__ __m512i __DEFAULT_FN_ATTRS
  248. _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
  249. return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
  250. (__v8di) __W,
  251. (__mmask8) __U,
  252. _MM_FROUND_CUR_DIRECTION);
  253. }
  254. static __inline__ __m512i __DEFAULT_FN_ATTRS
  255. _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
  256. return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
  257. (__v8di) _mm512_setzero_si512(),
  258. (__mmask8) __U,
  259. _MM_FROUND_CUR_DIRECTION);
  260. }
  261. #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \
  262. (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
  263. (__v8di)_mm512_setzero_si512(), \
  264. (__mmask8)-1, (int)(R)); })
  265. #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
  266. (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
  267. (__v8di)(__m512i)(W), \
  268. (__mmask8)(U), (int)(R)); })
  269. #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \
  270. (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
  271. (__v8di)_mm512_setzero_si512(), \
  272. (__mmask8)(U), (int)(R)); })
  273. static __inline__ __m512i __DEFAULT_FN_ATTRS
  274. _mm512_cvtps_epu64 (__m256 __A) {
  275. return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
  276. (__v8di) _mm512_setzero_si512(),
  277. (__mmask8) -1,
  278. _MM_FROUND_CUR_DIRECTION);
  279. }
  280. static __inline__ __m512i __DEFAULT_FN_ATTRS
  281. _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
  282. return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
  283. (__v8di) __W,
  284. (__mmask8) __U,
  285. _MM_FROUND_CUR_DIRECTION);
  286. }
  287. static __inline__ __m512i __DEFAULT_FN_ATTRS
  288. _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
  289. return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
  290. (__v8di) _mm512_setzero_si512(),
  291. (__mmask8) __U,
  292. _MM_FROUND_CUR_DIRECTION);
  293. }
  294. #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \
  295. (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
  296. (__v8di)_mm512_setzero_si512(), \
  297. (__mmask8)-1, (int)(R)); })
  298. #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
  299. (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
  300. (__v8di)(__m512i)(W), \
  301. (__mmask8)(U), (int)(R)); })
  302. #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \
  303. (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
  304. (__v8di)_mm512_setzero_si512(), \
  305. (__mmask8)(U), (int)(R)); })
  306. static __inline__ __m512d __DEFAULT_FN_ATTRS
  307. _mm512_cvtepi64_pd (__m512i __A) {
  308. return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
  309. (__v8df) _mm512_setzero_pd(),
  310. (__mmask8) -1,
  311. _MM_FROUND_CUR_DIRECTION);
  312. }
  313. static __inline__ __m512d __DEFAULT_FN_ATTRS
  314. _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
  315. return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
  316. (__v8df) __W,
  317. (__mmask8) __U,
  318. _MM_FROUND_CUR_DIRECTION);
  319. }
  320. static __inline__ __m512d __DEFAULT_FN_ATTRS
  321. _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
  322. return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
  323. (__v8df) _mm512_setzero_pd(),
  324. (__mmask8) __U,
  325. _MM_FROUND_CUR_DIRECTION);
  326. }
  327. #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \
  328. (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
  329. (__v8df)_mm512_setzero_pd(), \
  330. (__mmask8)-1, (int)(R)); })
  331. #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
  332. (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
  333. (__v8df)(__m512d)(W), \
  334. (__mmask8)(U), (int)(R)); })
  335. #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
  336. (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
  337. (__v8df)_mm512_setzero_pd(), \
  338. (__mmask8)(U), (int)(R)); })
  339. static __inline__ __m256 __DEFAULT_FN_ATTRS
  340. _mm512_cvtepi64_ps (__m512i __A) {
  341. return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
  342. (__v8sf) _mm256_setzero_ps(),
  343. (__mmask8) -1,
  344. _MM_FROUND_CUR_DIRECTION);
  345. }
  346. static __inline__ __m256 __DEFAULT_FN_ATTRS
  347. _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
  348. return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
  349. (__v8sf) __W,
  350. (__mmask8) __U,
  351. _MM_FROUND_CUR_DIRECTION);
  352. }
  353. static __inline__ __m256 __DEFAULT_FN_ATTRS
  354. _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
  355. return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
  356. (__v8sf) _mm256_setzero_ps(),
  357. (__mmask8) __U,
  358. _MM_FROUND_CUR_DIRECTION);
  359. }
  360. #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \
  361. (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
  362. (__v8sf)_mm256_setzero_ps(), \
  363. (__mmask8)-1, (int)(R)); })
  364. #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
  365. (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
  366. (__v8sf)(__m256)(W), (__mmask8)(U), \
  367. (int)(R)); })
  368. #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
  369. (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
  370. (__v8sf)_mm256_setzero_ps(), \
  371. (__mmask8)(U), (int)(R)); })
  372. static __inline__ __m512i __DEFAULT_FN_ATTRS
  373. _mm512_cvttpd_epi64 (__m512d __A) {
  374. return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
  375. (__v8di) _mm512_setzero_si512(),
  376. (__mmask8) -1,
  377. _MM_FROUND_CUR_DIRECTION);
  378. }
  379. static __inline__ __m512i __DEFAULT_FN_ATTRS
  380. _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
  381. return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
  382. (__v8di) __W,
  383. (__mmask8) __U,
  384. _MM_FROUND_CUR_DIRECTION);
  385. }
  386. static __inline__ __m512i __DEFAULT_FN_ATTRS
  387. _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
  388. return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
  389. (__v8di) _mm512_setzero_si512(),
  390. (__mmask8) __U,
  391. _MM_FROUND_CUR_DIRECTION);
  392. }
  393. #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \
  394. (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
  395. (__v8di)_mm512_setzero_si512(), \
  396. (__mmask8)-1, (int)(R)); })
  397. #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
  398. (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
  399. (__v8di)(__m512i)(W), \
  400. (__mmask8)(U), (int)(R)); })
  401. #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
  402. (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
  403. (__v8di)_mm512_setzero_si512(), \
  404. (__mmask8)(U), (int)(R)); })
  405. static __inline__ __m512i __DEFAULT_FN_ATTRS
  406. _mm512_cvttpd_epu64 (__m512d __A) {
  407. return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
  408. (__v8di) _mm512_setzero_si512(),
  409. (__mmask8) -1,
  410. _MM_FROUND_CUR_DIRECTION);
  411. }
  412. static __inline__ __m512i __DEFAULT_FN_ATTRS
  413. _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
  414. return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
  415. (__v8di) __W,
  416. (__mmask8) __U,
  417. _MM_FROUND_CUR_DIRECTION);
  418. }
  419. static __inline__ __m512i __DEFAULT_FN_ATTRS
  420. _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
  421. return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
  422. (__v8di) _mm512_setzero_si512(),
  423. (__mmask8) __U,
  424. _MM_FROUND_CUR_DIRECTION);
  425. }
  426. #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \
  427. (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
  428. (__v8di)_mm512_setzero_si512(), \
  429. (__mmask8)-1, (int)(R)); })
  430. #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
  431. (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
  432. (__v8di)(__m512i)(W), \
  433. (__mmask8)(U), (int)(R)); })
  434. #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \
  435. (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
  436. (__v8di)_mm512_setzero_si512(), \
  437. (__mmask8)(U), (int)(R)); })
  438. static __inline__ __m512i __DEFAULT_FN_ATTRS
  439. _mm512_cvttps_epi64 (__m256 __A) {
  440. return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
  441. (__v8di) _mm512_setzero_si512(),
  442. (__mmask8) -1,
  443. _MM_FROUND_CUR_DIRECTION);
  444. }
  445. static __inline__ __m512i __DEFAULT_FN_ATTRS
  446. _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
  447. return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
  448. (__v8di) __W,
  449. (__mmask8) __U,
  450. _MM_FROUND_CUR_DIRECTION);
  451. }
  452. static __inline__ __m512i __DEFAULT_FN_ATTRS
  453. _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
  454. return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
  455. (__v8di) _mm512_setzero_si512(),
  456. (__mmask8) __U,
  457. _MM_FROUND_CUR_DIRECTION);
  458. }
  459. #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \
  460. (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
  461. (__v8di)_mm512_setzero_si512(), \
  462. (__mmask8)-1, (int)(R)); })
  463. #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
  464. (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
  465. (__v8di)(__m512i)(W), \
  466. (__mmask8)(U), (int)(R)); })
  467. #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \
  468. (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
  469. (__v8di)_mm512_setzero_si512(), \
  470. (__mmask8)(U), (int)(R)); })
  471. static __inline__ __m512i __DEFAULT_FN_ATTRS
  472. _mm512_cvttps_epu64 (__m256 __A) {
  473. return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
  474. (__v8di) _mm512_setzero_si512(),
  475. (__mmask8) -1,
  476. _MM_FROUND_CUR_DIRECTION);
  477. }
  478. static __inline__ __m512i __DEFAULT_FN_ATTRS
  479. _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
  480. return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
  481. (__v8di) __W,
  482. (__mmask8) __U,
  483. _MM_FROUND_CUR_DIRECTION);
  484. }
  485. static __inline__ __m512i __DEFAULT_FN_ATTRS
  486. _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
  487. return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
  488. (__v8di) _mm512_setzero_si512(),
  489. (__mmask8) __U,
  490. _MM_FROUND_CUR_DIRECTION);
  491. }
  492. #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \
  493. (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
  494. (__v8di)_mm512_setzero_si512(), \
  495. (__mmask8)-1, (int)(R)); })
  496. #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
  497. (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
  498. (__v8di)(__m512i)(W), \
  499. (__mmask8)(U), (int)(R)); })
  500. #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \
  501. (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
  502. (__v8di)_mm512_setzero_si512(), \
  503. (__mmask8)(U), (int)(R)); })
  504. static __inline__ __m512d __DEFAULT_FN_ATTRS
  505. _mm512_cvtepu64_pd (__m512i __A) {
  506. return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
  507. (__v8df) _mm512_setzero_pd(),
  508. (__mmask8) -1,
  509. _MM_FROUND_CUR_DIRECTION);
  510. }
  511. static __inline__ __m512d __DEFAULT_FN_ATTRS
  512. _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
  513. return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
  514. (__v8df) __W,
  515. (__mmask8) __U,
  516. _MM_FROUND_CUR_DIRECTION);
  517. }
  518. static __inline__ __m512d __DEFAULT_FN_ATTRS
  519. _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
  520. return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
  521. (__v8df) _mm512_setzero_pd(),
  522. (__mmask8) __U,
  523. _MM_FROUND_CUR_DIRECTION);
  524. }
  525. #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \
  526. (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
  527. (__v8df)_mm512_setzero_pd(), \
  528. (__mmask8)-1, (int)(R)); })
  529. #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
  530. (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
  531. (__v8df)(__m512d)(W), \
  532. (__mmask8)(U), (int)(R)); })
  533. #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
  534. (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
  535. (__v8df)_mm512_setzero_pd(), \
  536. (__mmask8)(U), (int)(R)); })
  537. static __inline__ __m256 __DEFAULT_FN_ATTRS
  538. _mm512_cvtepu64_ps (__m512i __A) {
  539. return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
  540. (__v8sf) _mm256_setzero_ps(),
  541. (__mmask8) -1,
  542. _MM_FROUND_CUR_DIRECTION);
  543. }
  544. static __inline__ __m256 __DEFAULT_FN_ATTRS
  545. _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
  546. return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
  547. (__v8sf) __W,
  548. (__mmask8) __U,
  549. _MM_FROUND_CUR_DIRECTION);
  550. }
  551. static __inline__ __m256 __DEFAULT_FN_ATTRS
  552. _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
  553. return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
  554. (__v8sf) _mm256_setzero_ps(),
  555. (__mmask8) __U,
  556. _MM_FROUND_CUR_DIRECTION);
  557. }
  558. #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \
  559. (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
  560. (__v8sf)_mm256_setzero_ps(), \
  561. (__mmask8)-1, (int)(R)); })
  562. #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
  563. (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
  564. (__v8sf)(__m256)(W), (__mmask8)(U), \
  565. (int)(R)); })
  566. #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
  567. (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
  568. (__v8sf)_mm256_setzero_ps(), \
  569. (__mmask8)(U), (int)(R)); })
  570. #define _mm512_range_pd(A, B, C) __extension__ ({ \
  571. (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
  572. (__v8df)(__m512d)(B), (int)(C), \
  573. (__v8df)_mm512_setzero_pd(), \
  574. (__mmask8)-1, \
  575. _MM_FROUND_CUR_DIRECTION); })
  576. #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \
  577. (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
  578. (__v8df)(__m512d)(B), (int)(C), \
  579. (__v8df)(__m512d)(W), (__mmask8)(U), \
  580. _MM_FROUND_CUR_DIRECTION); })
  581. #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \
  582. (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
  583. (__v8df)(__m512d)(B), (int)(C), \
  584. (__v8df)_mm512_setzero_pd(), \
  585. (__mmask8)(U), \
  586. _MM_FROUND_CUR_DIRECTION); })
  587. #define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \
  588. (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
  589. (__v8df)(__m512d)(B), (int)(C), \
  590. (__v8df)_mm512_setzero_pd(), \
  591. (__mmask8)-1, (int)(R)); })
  592. #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
  593. (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
  594. (__v8df)(__m512d)(B), (int)(C), \
  595. (__v8df)(__m512d)(W), (__mmask8)(U), \
  596. (int)(R)); })
  597. #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
  598. (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
  599. (__v8df)(__m512d)(B), (int)(C), \
  600. (__v8df)_mm512_setzero_pd(), \
  601. (__mmask8)(U), (int)(R)); })
  602. #define _mm512_range_ps(A, B, C) __extension__ ({ \
  603. (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
  604. (__v16sf)(__m512)(B), (int)(C), \
  605. (__v16sf)_mm512_setzero_ps(), \
  606. (__mmask16)-1, \
  607. _MM_FROUND_CUR_DIRECTION); })
  608. #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \
  609. (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
  610. (__v16sf)(__m512)(B), (int)(C), \
  611. (__v16sf)(__m512)(W), (__mmask16)(U), \
  612. _MM_FROUND_CUR_DIRECTION); })
  613. #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \
  614. (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
  615. (__v16sf)(__m512)(B), (int)(C), \
  616. (__v16sf)_mm512_setzero_ps(), \
  617. (__mmask16)(U), \
  618. _MM_FROUND_CUR_DIRECTION); })
  619. #define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \
  620. (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
  621. (__v16sf)(__m512)(B), (int)(C), \
  622. (__v16sf)_mm512_setzero_ps(), \
  623. (__mmask16)-1, (int)(R)); })
  624. #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
  625. (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
  626. (__v16sf)(__m512)(B), (int)(C), \
  627. (__v16sf)(__m512)(W), (__mmask16)(U), \
  628. (int)(R)); })
  629. #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
  630. (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
  631. (__v16sf)(__m512)(B), (int)(C), \
  632. (__v16sf)_mm512_setzero_ps(), \
  633. (__mmask16)(U), (int)(R)); })
  634. #define _mm_range_round_ss(A, B, C, R) __extension__ ({ \
  635. (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
  636. (__v4sf)(__m128)(B), \
  637. (__v4sf)_mm_setzero_ps(), \
  638. (__mmask8) -1, (int)(C),\
  639. (int)(R)); })
  640. #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
  641. #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
  642. (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
  643. (__v4sf)(__m128)(B), \
  644. (__v4sf)(__m128)(W),\
  645. (__mmask8)(U), (int)(C),\
  646. (int)(R)); })
  647. #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
  648. #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
  649. (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
  650. (__v4sf)(__m128)(B), \
  651. (__v4sf)_mm_setzero_ps(), \
  652. (__mmask8)(U), (int)(C),\
  653. (int)(R)); })
  654. #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
  655. #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \
  656. (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
  657. (__v2df)(__m128d)(B), \
  658. (__v2df)_mm_setzero_pd(), \
  659. (__mmask8) -1, (int)(C),\
  660. (int)(R)); })
  661. #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
  662. #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
  663. (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
  664. (__v2df)(__m128d)(B), \
  665. (__v2df)(__m128d)(W),\
  666. (__mmask8)(U), (int)(C),\
  667. (int)(R)); })
  668. #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
  669. #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
  670. (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
  671. (__v2df)(__m128d)(B), \
  672. (__v2df)_mm_setzero_pd(), \
  673. (__mmask8)(U), (int)(C),\
  674. (int)(R)); })
  675. #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
  676. #define _mm512_reduce_pd(A, B) __extension__ ({ \
  677. (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
  678. (__v8df)_mm512_setzero_pd(), \
  679. (__mmask8)-1, \
  680. _MM_FROUND_CUR_DIRECTION); })
  681. #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
  682. (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
  683. (__v8df)(__m512d)(W), \
  684. (__mmask8)(U), \
  685. _MM_FROUND_CUR_DIRECTION); })
  686. #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \
  687. (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
  688. (__v8df)_mm512_setzero_pd(), \
  689. (__mmask8)(U), \
  690. _MM_FROUND_CUR_DIRECTION); })
  691. #define _mm512_reduce_ps(A, B) __extension__ ({ \
  692. (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
  693. (__v16sf)_mm512_setzero_ps(), \
  694. (__mmask16)-1, \
  695. _MM_FROUND_CUR_DIRECTION); })
  696. #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \
  697. (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
  698. (__v16sf)(__m512)(W), \
  699. (__mmask16)(U), \
  700. _MM_FROUND_CUR_DIRECTION); })
  701. #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \
  702. (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
  703. (__v16sf)_mm512_setzero_ps(), \
  704. (__mmask16)(U), \
  705. _MM_FROUND_CUR_DIRECTION); })
  706. #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
  707. (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
  708. (__v8df)_mm512_setzero_pd(), \
  709. (__mmask8)-1, (int)(R)); })
  710. #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
  711. (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
  712. (__v8df)(__m512d)(W), \
  713. (__mmask8)(U), (int)(R)); })
  714. #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
  715. (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
  716. (__v8df)_mm512_setzero_pd(), \
  717. (__mmask8)(U), (int)(R)); })
  718. #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
  719. (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
  720. (__v16sf)_mm512_setzero_ps(), \
  721. (__mmask16)-1, (int)(R)); })
  722. #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
  723. (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
  724. (__v16sf)(__m512)(W), \
  725. (__mmask16)(U), (int)(R)); })
  726. #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
  727. (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
  728. (__v16sf)_mm512_setzero_ps(), \
  729. (__mmask16)(U), (int)(R)); })
  730. #define _mm_reduce_ss(A, B, C) __extension__ ({ \
  731. (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
  732. (__v4sf)(__m128)(B), \
  733. (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
  734. (int)(C), _MM_FROUND_CUR_DIRECTION); })
  735. #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \
  736. (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
  737. (__v4sf)(__m128)(B), \
  738. (__v4sf)(__m128)(W), (__mmask8)(U), \
  739. (int)(C), _MM_FROUND_CUR_DIRECTION); })
  740. #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \
  741. (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
  742. (__v4sf)(__m128)(B), \
  743. (__v4sf)_mm_setzero_ps(), \
  744. (__mmask8)(U), (int)(C), \
  745. _MM_FROUND_CUR_DIRECTION); })
  746. #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \
  747. (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
  748. (__v4sf)(__m128)(B), \
  749. (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
  750. (int)(C), (int)(R)); })
  751. #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \
  752. (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
  753. (__v4sf)(__m128)(B), \
  754. (__v4sf)(__m128)(W), (__mmask8)(U), \
  755. (int)(C), (int)(R)); })
  756. #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \
  757. (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
  758. (__v4sf)(__m128)(B), \
  759. (__v4sf)_mm_setzero_ps(), \
  760. (__mmask8)(U), (int)(C), (int)(R)); })
  761. #define _mm_reduce_sd(A, B, C) __extension__ ({ \
  762. (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
  763. (__v2df)(__m128d)(B), \
  764. (__v2df)_mm_setzero_pd(), \
  765. (__mmask8)-1, (int)(C), \
  766. _MM_FROUND_CUR_DIRECTION); })
  767. #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \
  768. (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
  769. (__v2df)(__m128d)(B), \
  770. (__v2df)(__m128d)(W), (__mmask8)(U), \
  771. (int)(C), _MM_FROUND_CUR_DIRECTION); })
  772. #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \
  773. (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
  774. (__v2df)(__m128d)(B), \
  775. (__v2df)_mm_setzero_pd(), \
  776. (__mmask8)(U), (int)(C), \
  777. _MM_FROUND_CUR_DIRECTION); })
  778. #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \
  779. (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
  780. (__v2df)(__m128d)(B), \
  781. (__v2df)_mm_setzero_pd(), \
  782. (__mmask8)-1, (int)(C), (int)(R)); })
  783. #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \
  784. (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
  785. (__v2df)(__m128d)(B), \
  786. (__v2df)(__m128d)(W), (__mmask8)(U), \
  787. (int)(C), (int)(R)); })
  788. #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \
  789. (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
  790. (__v2df)(__m128d)(B), \
  791. (__v2df)_mm_setzero_pd(), \
  792. (__mmask8)(U), (int)(C), (int)(R)); })
  793. static __inline__ __mmask16 __DEFAULT_FN_ATTRS
  794. _mm512_movepi32_mask (__m512i __A)
  795. {
  796. return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
  797. }
  798. static __inline__ __m512i __DEFAULT_FN_ATTRS
  799. _mm512_movm_epi32 (__mmask16 __A)
  800. {
  801. return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
  802. }
  803. static __inline__ __m512i __DEFAULT_FN_ATTRS
  804. _mm512_movm_epi64 (__mmask8 __A)
  805. {
  806. return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
  807. }
  808. static __inline__ __mmask8 __DEFAULT_FN_ATTRS
  809. _mm512_movepi64_mask (__m512i __A)
  810. {
  811. return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
  812. }
  813. static __inline__ __m512 __DEFAULT_FN_ATTRS
  814. _mm512_broadcast_f32x2 (__m128 __A)
  815. {
  816. return (__m512)__builtin_shufflevector((__v4sf)__A,
  817. (__v4sf)_mm_undefined_ps(),
  818. 0, 1, 0, 1, 0, 1, 0, 1,
  819. 0, 1, 0, 1, 0, 1, 0, 1);
  820. }
  821. static __inline__ __m512 __DEFAULT_FN_ATTRS
  822. _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
  823. {
  824. return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
  825. (__v16sf)_mm512_broadcast_f32x2(__A),
  826. (__v16sf)__O);
  827. }
  828. static __inline__ __m512 __DEFAULT_FN_ATTRS
  829. _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
  830. {
  831. return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
  832. (__v16sf)_mm512_broadcast_f32x2(__A),
  833. (__v16sf)_mm512_setzero_ps());
  834. }
  835. static __inline__ __m512 __DEFAULT_FN_ATTRS
  836. _mm512_broadcast_f32x8(__m256 __A)
  837. {
  838. return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
  839. 0, 1, 2, 3, 4, 5, 6, 7,
  840. 0, 1, 2, 3, 4, 5, 6, 7);
  841. }
  842. static __inline__ __m512 __DEFAULT_FN_ATTRS
  843. _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
  844. {
  845. return (__m512)__builtin_ia32_selectps_512((__mmask8)__M,
  846. (__v16sf)_mm512_broadcast_f32x8(__A),
  847. (__v16sf)__O);
  848. }
  849. static __inline__ __m512 __DEFAULT_FN_ATTRS
  850. _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
  851. {
  852. return (__m512)__builtin_ia32_selectps_512((__mmask8)__M,
  853. (__v16sf)_mm512_broadcast_f32x8(__A),
  854. (__v16sf)_mm512_setzero_ps());
  855. }
  856. static __inline__ __m512d __DEFAULT_FN_ATTRS
  857. _mm512_broadcast_f64x2(__m128d __A)
  858. {
  859. return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
  860. 0, 1, 0, 1, 0, 1, 0, 1);
  861. }
  862. static __inline__ __m512d __DEFAULT_FN_ATTRS
  863. _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
  864. {
  865. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
  866. (__v8df)_mm512_broadcast_f64x2(__A),
  867. (__v8df)__O);
  868. }
  869. static __inline__ __m512d __DEFAULT_FN_ATTRS
  870. _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
  871. {
  872. return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
  873. (__v8df)_mm512_broadcast_f64x2(__A),
  874. (__v8df)_mm512_setzero_pd());
  875. }
  876. static __inline__ __m512i __DEFAULT_FN_ATTRS
  877. _mm512_broadcast_i32x2 (__m128i __A)
  878. {
  879. return (__m512i)__builtin_shufflevector((__v4si)__A,
  880. (__v4si)_mm_undefined_si128(),
  881. 0, 1, 0, 1, 0, 1, 0, 1,
  882. 0, 1, 0, 1, 0, 1, 0, 1);
  883. }
  884. static __inline__ __m512i __DEFAULT_FN_ATTRS
  885. _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
  886. {
  887. return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
  888. (__v16si)_mm512_broadcast_i32x2(__A),
  889. (__v16si)__O);
  890. }
  891. static __inline__ __m512i __DEFAULT_FN_ATTRS
  892. _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
  893. {
  894. return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
  895. (__v16si)_mm512_broadcast_i32x2(__A),
  896. (__v16si)_mm512_setzero_si512());
  897. }
  898. static __inline__ __m512i __DEFAULT_FN_ATTRS
  899. _mm512_broadcast_i32x8(__m256i __A)
  900. {
  901. return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
  902. 0, 1, 2, 3, 4, 5, 6, 7,
  903. 0, 1, 2, 3, 4, 5, 6, 7);
  904. }
  905. static __inline__ __m512i __DEFAULT_FN_ATTRS
  906. _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
  907. {
  908. return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M,
  909. (__v16si)_mm512_broadcast_i32x8(__A),
  910. (__v16si)__O);
  911. }
  912. static __inline__ __m512i __DEFAULT_FN_ATTRS
  913. _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
  914. {
  915. return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M,
  916. (__v16si)_mm512_broadcast_i32x8(__A),
  917. (__v16si)_mm512_setzero_si512());
  918. }
  919. static __inline__ __m512i __DEFAULT_FN_ATTRS
  920. _mm512_broadcast_i64x2(__m128i __A)
  921. {
  922. return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
  923. 0, 1, 0, 1, 0, 1, 0, 1);
  924. }
  925. static __inline__ __m512i __DEFAULT_FN_ATTRS
  926. _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
  927. {
  928. return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
  929. (__v8di)_mm512_broadcast_i64x2(__A),
  930. (__v8di)__O);
  931. }
  932. static __inline__ __m512i __DEFAULT_FN_ATTRS
  933. _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
  934. {
  935. return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
  936. (__v8di)_mm512_broadcast_i64x2(__A),
  937. (__v8di)_mm512_setzero_si512());
  938. }
  939. #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
  940. (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \
  941. (__v16sf)_mm512_undefined_ps(), \
  942. ((imm) & 1) ? 8 : 0, \
  943. ((imm) & 1) ? 9 : 1, \
  944. ((imm) & 1) ? 10 : 2, \
  945. ((imm) & 1) ? 11 : 3, \
  946. ((imm) & 1) ? 12 : 4, \
  947. ((imm) & 1) ? 13 : 5, \
  948. ((imm) & 1) ? 14 : 6, \
  949. ((imm) & 1) ? 15 : 7); })
  950. #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
  951. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  952. (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
  953. (__v8sf)(W)); })
  954. #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
  955. (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
  956. (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
  957. (__v8sf)_mm256_setzero_ps()); })
  958. #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
  959. (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \
  960. (__v8df)_mm512_undefined_pd(), \
  961. 0 + ((imm) & 0x3) * 2, \
  962. 1 + ((imm) & 0x3) * 2); })
  963. #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
  964. (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
  965. (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
  966. (__v2df)(W)); })
  967. #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
  968. (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
  969. (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
  970. (__v2df)_mm_setzero_pd()); })
  971. #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
  972. (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \
  973. (__v16si)_mm512_undefined_epi32(), \
  974. ((imm) & 1) ? 8 : 0, \
  975. ((imm) & 1) ? 9 : 1, \
  976. ((imm) & 1) ? 10 : 2, \
  977. ((imm) & 1) ? 11 : 3, \
  978. ((imm) & 1) ? 12 : 4, \
  979. ((imm) & 1) ? 13 : 5, \
  980. ((imm) & 1) ? 14 : 6, \
  981. ((imm) & 1) ? 15 : 7); })
  982. #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
  983. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  984. (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
  985. (__v8si)(W)); })
  986. #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
  987. (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
  988. (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
  989. (__v8si)_mm256_setzero_si256()); })
  990. #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
  991. (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \
  992. (__v8di)_mm512_undefined_epi32(), \
  993. 0 + ((imm) & 0x3) * 2, \
  994. 1 + ((imm) & 0x3) * 2); })
  995. #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
  996. (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
  997. (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
  998. (__v2di)(W)); })
  999. #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
  1000. (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
  1001. (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
  1002. (__v2di)_mm_setzero_di()); })
  1003. #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
  1004. (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
  1005. (__v16sf)_mm512_castps256_ps512((__m256)(B)),\
  1006. ((imm) & 0x1) ? 0 : 16, \
  1007. ((imm) & 0x1) ? 1 : 17, \
  1008. ((imm) & 0x1) ? 2 : 18, \
  1009. ((imm) & 0x1) ? 3 : 19, \
  1010. ((imm) & 0x1) ? 4 : 20, \
  1011. ((imm) & 0x1) ? 5 : 21, \
  1012. ((imm) & 0x1) ? 6 : 22, \
  1013. ((imm) & 0x1) ? 7 : 23, \
  1014. ((imm) & 0x1) ? 16 : 8, \
  1015. ((imm) & 0x1) ? 17 : 9, \
  1016. ((imm) & 0x1) ? 18 : 10, \
  1017. ((imm) & 0x1) ? 19 : 11, \
  1018. ((imm) & 0x1) ? 20 : 12, \
  1019. ((imm) & 0x1) ? 21 : 13, \
  1020. ((imm) & 0x1) ? 22 : 14, \
  1021. ((imm) & 0x1) ? 23 : 15); })
  1022. #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
  1023. (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
  1024. (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
  1025. (__v16sf)(W)); })
  1026. #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
  1027. (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
  1028. (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
  1029. (__v16sf)_mm512_setzero_ps()); })
  1030. #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
  1031. (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
  1032. (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\
  1033. (((imm) & 0x3) == 0) ? 8 : 0, \
  1034. (((imm) & 0x3) == 0) ? 9 : 1, \
  1035. (((imm) & 0x3) == 1) ? 8 : 2, \
  1036. (((imm) & 0x3) == 1) ? 9 : 3, \
  1037. (((imm) & 0x3) == 2) ? 8 : 4, \
  1038. (((imm) & 0x3) == 2) ? 9 : 5, \
  1039. (((imm) & 0x3) == 3) ? 8 : 6, \
  1040. (((imm) & 0x3) == 3) ? 9 : 7); })
  1041. #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
  1042. (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
  1043. (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
  1044. (__v8df)(W)); })
  1045. #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
  1046. (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
  1047. (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
  1048. (__v8df)_mm512_setzero_pd()); })
  1049. #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
  1050. (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
  1051. (__v16si)_mm512_castsi256_si512((__m256i)(B)),\
  1052. ((imm) & 0x1) ? 0 : 16, \
  1053. ((imm) & 0x1) ? 1 : 17, \
  1054. ((imm) & 0x1) ? 2 : 18, \
  1055. ((imm) & 0x1) ? 3 : 19, \
  1056. ((imm) & 0x1) ? 4 : 20, \
  1057. ((imm) & 0x1) ? 5 : 21, \
  1058. ((imm) & 0x1) ? 6 : 22, \
  1059. ((imm) & 0x1) ? 7 : 23, \
  1060. ((imm) & 0x1) ? 16 : 8, \
  1061. ((imm) & 0x1) ? 17 : 9, \
  1062. ((imm) & 0x1) ? 18 : 10, \
  1063. ((imm) & 0x1) ? 19 : 11, \
  1064. ((imm) & 0x1) ? 20 : 12, \
  1065. ((imm) & 0x1) ? 21 : 13, \
  1066. ((imm) & 0x1) ? 22 : 14, \
  1067. ((imm) & 0x1) ? 23 : 15); })
  1068. #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
  1069. (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
  1070. (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
  1071. (__v16si)(W)); })
  1072. #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
  1073. (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
  1074. (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
  1075. (__v16si)_mm512_setzero_si512()); })
  1076. #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
  1077. (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
  1078. (__v8di)_mm512_castsi128_si512((__m128i)(B)),\
  1079. (((imm) & 0x3) == 0) ? 8 : 0, \
  1080. (((imm) & 0x3) == 0) ? 9 : 1, \
  1081. (((imm) & 0x3) == 1) ? 8 : 2, \
  1082. (((imm) & 0x3) == 1) ? 9 : 3, \
  1083. (((imm) & 0x3) == 2) ? 8 : 4, \
  1084. (((imm) & 0x3) == 2) ? 9 : 5, \
  1085. (((imm) & 0x3) == 3) ? 8 : 6, \
  1086. (((imm) & 0x3) == 3) ? 9 : 7); })
  1087. #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
  1088. (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
  1089. (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
  1090. (__v8di)(W)); })
  1091. #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
  1092. (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
  1093. (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
  1094. (__v8di)_mm512_setzero_si512()); })
  1095. #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
  1096. (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
  1097. (int)(imm), (__mmask16)(U)); })
  1098. #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
  1099. (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
  1100. (int)(imm), (__mmask16)-1); })
  1101. #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
  1102. (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
  1103. (__mmask8)(U)); })
  1104. #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
  1105. (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
  1106. (__mmask8)-1); })
  1107. #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
  1108. (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
  1109. (__mmask8)-1); })
  1110. #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
  1111. (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
  1112. (__mmask8)(U)); })
  1113. #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
  1114. (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
  1115. (__mmask8)-1); })
  1116. #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
  1117. (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
  1118. (__mmask8)(U)); })
  1119. #undef __DEFAULT_FN_ATTRS
  1120. #endif