avx512vlcdintrin.h 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ---------------------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __IMMINTRIN_H
  24. #error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead."
  25. #endif
  26. #ifndef __AVX512VLCDINTRIN_H
  27. #define __AVX512VLCDINTRIN_H
  28. /* Define the default attributes for the functions in this file. */
  29. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd")))
  30. static __inline__ __m128i __DEFAULT_FN_ATTRS
  31. _mm_broadcastmb_epi64 (__mmask8 __A)
  32. {
  33. return (__m128i) _mm_set1_epi64x((long long) __A);
  34. }
  35. static __inline__ __m256i __DEFAULT_FN_ATTRS
  36. _mm256_broadcastmb_epi64 (__mmask8 __A)
  37. {
  38. return (__m256i) _mm256_set1_epi64x((long long)__A);
  39. }
  40. static __inline__ __m128i __DEFAULT_FN_ATTRS
  41. _mm_broadcastmw_epi32 (__mmask16 __A)
  42. {
  43. return (__m128i) _mm_set1_epi32((int)__A);
  44. }
  45. static __inline__ __m256i __DEFAULT_FN_ATTRS
  46. _mm256_broadcastmw_epi32 (__mmask16 __A)
  47. {
  48. return (__m256i) _mm256_set1_epi32((int)__A);
  49. }
  50. static __inline__ __m128i __DEFAULT_FN_ATTRS
  51. _mm_conflict_epi64 (__m128i __A)
  52. {
  53. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  54. (__v2di) _mm_undefined_si128 (),
  55. (__mmask8) -1);
  56. }
  57. static __inline__ __m128i __DEFAULT_FN_ATTRS
  58. _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  59. {
  60. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  61. (__v2di) __W,
  62. (__mmask8) __U);
  63. }
  64. static __inline__ __m128i __DEFAULT_FN_ATTRS
  65. _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
  66. {
  67. return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
  68. (__v2di)
  69. _mm_setzero_di (),
  70. (__mmask8) __U);
  71. }
  72. static __inline__ __m256i __DEFAULT_FN_ATTRS
  73. _mm256_conflict_epi64 (__m256i __A)
  74. {
  75. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  76. (__v4di) _mm256_undefined_si256 (),
  77. (__mmask8) -1);
  78. }
  79. static __inline__ __m256i __DEFAULT_FN_ATTRS
  80. _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  81. {
  82. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  83. (__v4di) __W,
  84. (__mmask8) __U);
  85. }
  86. static __inline__ __m256i __DEFAULT_FN_ATTRS
  87. _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
  88. {
  89. return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
  90. (__v4di) _mm256_setzero_si256 (),
  91. (__mmask8) __U);
  92. }
  93. static __inline__ __m128i __DEFAULT_FN_ATTRS
  94. _mm_conflict_epi32 (__m128i __A)
  95. {
  96. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  97. (__v4si) _mm_undefined_si128 (),
  98. (__mmask8) -1);
  99. }
  100. static __inline__ __m128i __DEFAULT_FN_ATTRS
  101. _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  102. {
  103. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  104. (__v4si) __W,
  105. (__mmask8) __U);
  106. }
  107. static __inline__ __m128i __DEFAULT_FN_ATTRS
  108. _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
  109. {
  110. return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
  111. (__v4si) _mm_setzero_si128 (),
  112. (__mmask8) __U);
  113. }
  114. static __inline__ __m256i __DEFAULT_FN_ATTRS
  115. _mm256_conflict_epi32 (__m256i __A)
  116. {
  117. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  118. (__v8si) _mm256_undefined_si256 (),
  119. (__mmask8) -1);
  120. }
  121. static __inline__ __m256i __DEFAULT_FN_ATTRS
  122. _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  123. {
  124. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  125. (__v8si) __W,
  126. (__mmask8) __U);
  127. }
  128. static __inline__ __m256i __DEFAULT_FN_ATTRS
  129. _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
  130. {
  131. return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
  132. (__v8si)
  133. _mm256_setzero_si256 (),
  134. (__mmask8) __U);
  135. }
  136. static __inline__ __m128i __DEFAULT_FN_ATTRS
  137. _mm_lzcnt_epi32 (__m128i __A)
  138. {
  139. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  140. (__v4si)
  141. _mm_setzero_si128 (),
  142. (__mmask8) -1);
  143. }
  144. static __inline__ __m128i __DEFAULT_FN_ATTRS
  145. _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
  146. {
  147. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  148. (__v4si) __W,
  149. (__mmask8) __U);
  150. }
  151. static __inline__ __m128i __DEFAULT_FN_ATTRS
  152. _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
  153. {
  154. return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
  155. (__v4si)
  156. _mm_setzero_si128 (),
  157. (__mmask8) __U);
  158. }
  159. static __inline__ __m256i __DEFAULT_FN_ATTRS
  160. _mm256_lzcnt_epi32 (__m256i __A)
  161. {
  162. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  163. (__v8si)
  164. _mm256_setzero_si256 (),
  165. (__mmask8) -1);
  166. }
  167. static __inline__ __m256i __DEFAULT_FN_ATTRS
  168. _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
  169. {
  170. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  171. (__v8si) __W,
  172. (__mmask8) __U);
  173. }
  174. static __inline__ __m256i __DEFAULT_FN_ATTRS
  175. _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
  176. {
  177. return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
  178. (__v8si)
  179. _mm256_setzero_si256 (),
  180. (__mmask8) __U);
  181. }
  182. static __inline__ __m128i __DEFAULT_FN_ATTRS
  183. _mm_lzcnt_epi64 (__m128i __A)
  184. {
  185. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  186. (__v2di)
  187. _mm_setzero_di (),
  188. (__mmask8) -1);
  189. }
  190. static __inline__ __m128i __DEFAULT_FN_ATTRS
  191. _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
  192. {
  193. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  194. (__v2di) __W,
  195. (__mmask8) __U);
  196. }
  197. static __inline__ __m128i __DEFAULT_FN_ATTRS
  198. _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
  199. {
  200. return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
  201. (__v2di)
  202. _mm_setzero_di (),
  203. (__mmask8) __U);
  204. }
  205. static __inline__ __m256i __DEFAULT_FN_ATTRS
  206. _mm256_lzcnt_epi64 (__m256i __A)
  207. {
  208. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  209. (__v4di)
  210. _mm256_setzero_si256 (),
  211. (__mmask8) -1);
  212. }
  213. static __inline__ __m256i __DEFAULT_FN_ATTRS
  214. _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
  215. {
  216. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  217. (__v4di) __W,
  218. (__mmask8) __U);
  219. }
  220. static __inline__ __m256i __DEFAULT_FN_ATTRS
  221. _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
  222. {
  223. return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
  224. (__v4di)
  225. _mm256_setzero_si256 (),
  226. (__mmask8) __U);
  227. }
  228. #undef __DEFAULT_FN_ATTRS
  229. #endif /* __AVX512VLCDINTRIN_H */