avx512vbmivlintrin.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. /*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
  2. *
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. * THE SOFTWARE.
  21. *
  22. *===-----------------------------------------------------------------------===
  23. */
  24. #ifndef __IMMINTRIN_H
  25. #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
  26. #endif
  27. #ifndef __VBMIVLINTRIN_H
  28. #define __VBMIVLINTRIN_H
  29. /* Define the default attributes for the functions in this file. */
  30. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl")))
  31. static __inline__ __m128i __DEFAULT_FN_ATTRS
  32. _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
  33. __m128i __B)
  34. {
  35. return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
  36. (__v16qi) __I
  37. /* idx */ ,
  38. (__v16qi) __B,
  39. (__mmask16)
  40. __U);
  41. }
  42. static __inline__ __m256i __DEFAULT_FN_ATTRS
  43. _mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
  44. __mmask32 __U, __m256i __B)
  45. {
  46. return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
  47. (__v32qi) __I
  48. /* idx */ ,
  49. (__v32qi) __B,
  50. (__mmask32)
  51. __U);
  52. }
  53. static __inline__ __m128i __DEFAULT_FN_ATTRS
  54. _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
  55. {
  56. return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
  57. /* idx */ ,
  58. (__v16qi) __A,
  59. (__v16qi) __B,
  60. (__mmask16) -
  61. 1);
  62. }
  63. static __inline__ __m128i __DEFAULT_FN_ATTRS
  64. _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
  65. __m128i __B)
  66. {
  67. return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
  68. /* idx */ ,
  69. (__v16qi) __A,
  70. (__v16qi) __B,
  71. (__mmask16)
  72. __U);
  73. }
  74. static __inline__ __m128i __DEFAULT_FN_ATTRS
  75. _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
  76. __m128i __B)
  77. {
  78. return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
  79. /* idx */ ,
  80. (__v16qi) __A,
  81. (__v16qi) __B,
  82. (__mmask16)
  83. __U);
  84. }
  85. static __inline__ __m256i __DEFAULT_FN_ATTRS
  86. _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
  87. {
  88. return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
  89. /* idx */ ,
  90. (__v32qi) __A,
  91. (__v32qi) __B,
  92. (__mmask32) -
  93. 1);
  94. }
  95. static __inline__ __m256i __DEFAULT_FN_ATTRS
  96. _mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
  97. __m256i __I, __m256i __B)
  98. {
  99. return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
  100. /* idx */ ,
  101. (__v32qi) __A,
  102. (__v32qi) __B,
  103. (__mmask32)
  104. __U);
  105. }
  106. static __inline__ __m256i __DEFAULT_FN_ATTRS
  107. _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
  108. __m256i __I, __m256i __B)
  109. {
  110. return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
  111. /* idx */ ,
  112. (__v32qi) __A,
  113. (__v32qi) __B,
  114. (__mmask32)
  115. __U);
  116. }
  117. static __inline__ __m128i __DEFAULT_FN_ATTRS
  118. _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
  119. {
  120. return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
  121. (__v16qi) __A,
  122. (__v16qi) _mm_undefined_si128 (),
  123. (__mmask16) -1);
  124. }
  125. static __inline__ __m128i __DEFAULT_FN_ATTRS
  126. _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
  127. {
  128. return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
  129. (__v16qi) __A,
  130. (__v16qi) _mm_setzero_si128 (),
  131. (__mmask16) __M);
  132. }
  133. static __inline__ __m128i __DEFAULT_FN_ATTRS
  134. _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
  135. __m128i __B)
  136. {
  137. return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
  138. (__v16qi) __A,
  139. (__v16qi) __W,
  140. (__mmask16) __M);
  141. }
  142. static __inline__ __m256i __DEFAULT_FN_ATTRS
  143. _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
  144. {
  145. return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
  146. (__v32qi) __A,
  147. (__v32qi) _mm256_undefined_si256 (),
  148. (__mmask32) -1);
  149. }
  150. static __inline__ __m256i __DEFAULT_FN_ATTRS
  151. _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
  152. __m256i __B)
  153. {
  154. return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
  155. (__v32qi) __A,
  156. (__v32qi) _mm256_setzero_si256 (),
  157. (__mmask32) __M);
  158. }
  159. static __inline__ __m256i __DEFAULT_FN_ATTRS
  160. _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
  161. __m256i __B)
  162. {
  163. return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
  164. (__v32qi) __A,
  165. (__v32qi) __W,
  166. (__mmask32) __M);
  167. }
  168. static __inline__ __m128i __DEFAULT_FN_ATTRS
  169. _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
  170. {
  171. return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
  172. (__v16qi) __Y,
  173. (__v16qi) __W,
  174. (__mmask16) __M);
  175. }
  176. static __inline__ __m128i __DEFAULT_FN_ATTRS
  177. _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
  178. {
  179. return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
  180. (__v16qi) __Y,
  181. (__v16qi)
  182. _mm_setzero_si128 (),
  183. (__mmask16) __M);
  184. }
  185. static __inline__ __m128i __DEFAULT_FN_ATTRS
  186. _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
  187. {
  188. return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
  189. (__v16qi) __Y,
  190. (__v16qi)
  191. _mm_undefined_si128 (),
  192. (__mmask16) -1);
  193. }
  194. static __inline__ __m256i __DEFAULT_FN_ATTRS
  195. _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
  196. {
  197. return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
  198. (__v32qi) __Y,
  199. (__v32qi) __W,
  200. (__mmask32) __M);
  201. }
  202. static __inline__ __m256i __DEFAULT_FN_ATTRS
  203. _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
  204. {
  205. return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
  206. (__v32qi) __Y,
  207. (__v32qi)
  208. _mm256_setzero_si256 (),
  209. (__mmask32) __M);
  210. }
  211. static __inline__ __m256i __DEFAULT_FN_ATTRS
  212. _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
  213. {
  214. return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
  215. (__v32qi) __Y,
  216. (__v32qi)
  217. _mm256_undefined_si256 (),
  218. (__mmask32) -1);
  219. }
  220. #undef __DEFAULT_FN_ATTRS
  221. #endif