avx512vlvnniintrin.h 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
  2. *
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. * THE SOFTWARE.
  21. *
  22. *===-----------------------------------------------------------------------===
  23. */
  24. #ifndef __IMMINTRIN_H
  25. #error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
  26. #endif
  27. #ifndef __AVX512VLVNNIINTRIN_H
  28. #define __AVX512VLVNNIINTRIN_H
  29. /* Define the default attributes for the functions in this file. */
  30. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni")))
  31. static __inline__ __m256i __DEFAULT_FN_ATTRS
  32. _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
  33. {
  34. return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
  35. (__v8si) __A,
  36. (__v8si) __B,
  37. (__mmask8) __U);
  38. }
  39. static __inline__ __m256i __DEFAULT_FN_ATTRS
  40. _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
  41. {
  42. return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
  43. (__v8si) __A,
  44. (__v8si) __B,
  45. (__mmask8) __U);
  46. }
  47. static __inline__ __m256i __DEFAULT_FN_ATTRS
  48. _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
  49. {
  50. return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
  51. (__v8si) __A,
  52. (__v8si) __B,
  53. (__mmask8) -1);
  54. }
  55. static __inline__ __m256i __DEFAULT_FN_ATTRS
  56. _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
  57. {
  58. return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
  59. (__v8si) __A,
  60. (__v8si) __B,
  61. (__mmask8) __U);
  62. }
  63. static __inline__ __m256i __DEFAULT_FN_ATTRS
  64. _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
  65. {
  66. return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
  67. (__v8si) __A,
  68. (__v8si) __B,
  69. (__mmask8) __U);
  70. }
  71. static __inline__ __m256i __DEFAULT_FN_ATTRS
  72. _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
  73. {
  74. return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
  75. (__v8si) __A,
  76. (__v8si) __B,
  77. (__mmask8) -1);
  78. }
  79. static __inline__ __m256i __DEFAULT_FN_ATTRS
  80. _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
  81. {
  82. return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
  83. (__v8si) __A,
  84. (__v8si) __B,
  85. (__mmask8) __U);
  86. }
  87. static __inline__ __m256i __DEFAULT_FN_ATTRS
  88. _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
  89. {
  90. return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
  91. (__v8si) __A,
  92. (__v8si) __B,
  93. (__mmask8) __U);
  94. }
  95. static __inline__ __m256i __DEFAULT_FN_ATTRS
  96. _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
  97. {
  98. return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
  99. (__v8si) __A,
  100. (__v8si) __B,
  101. (__mmask8) -1);
  102. }
  103. static __inline__ __m256i __DEFAULT_FN_ATTRS
  104. _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
  105. {
  106. return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
  107. (__v8si) __A,
  108. (__v8si) __B,
  109. (__mmask8) __U);
  110. }
  111. static __inline__ __m256i __DEFAULT_FN_ATTRS
  112. _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
  113. {
  114. return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
  115. (__v8si) __A,
  116. (__v8si) __B,
  117. (__mmask8) __U);
  118. }
  119. static __inline__ __m256i __DEFAULT_FN_ATTRS
  120. _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
  121. {
  122. return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
  123. (__v8si) __A,
  124. (__v8si) __B,
  125. (__mmask8) -1);
  126. }
  127. static __inline__ __m128i __DEFAULT_FN_ATTRS
  128. _mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
  129. {
  130. return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
  131. (__v4si) __A,
  132. (__v4si) __B,
  133. (__mmask8) __U);
  134. }
  135. static __inline__ __m128i __DEFAULT_FN_ATTRS
  136. _mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
  137. {
  138. return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
  139. (__v4si) __A,
  140. (__v4si) __B,
  141. (__mmask8) __U);
  142. }
  143. static __inline__ __m128i __DEFAULT_FN_ATTRS
  144. _mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
  145. {
  146. return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
  147. (__v4si) __A,
  148. (__v4si) __B,
  149. (__mmask8) -1);
  150. }
  151. static __inline__ __m128i __DEFAULT_FN_ATTRS
  152. _mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
  153. {
  154. return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
  155. (__v4si) __A,
  156. (__v4si) __B,
  157. (__mmask8) __U);
  158. }
  159. static __inline__ __m128i __DEFAULT_FN_ATTRS
  160. _mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
  161. {
  162. return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
  163. (__v4si) __A,
  164. (__v4si) __B,
  165. (__mmask8) __U);
  166. }
  167. static __inline__ __m128i __DEFAULT_FN_ATTRS
  168. _mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
  169. {
  170. return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
  171. (__v4si) __A,
  172. (__v4si) __B,
  173. (__mmask8) -1);
  174. }
  175. static __inline__ __m128i __DEFAULT_FN_ATTRS
  176. _mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
  177. {
  178. return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
  179. (__v4si) __A,
  180. (__v4si) __B,
  181. (__mmask8) __U);
  182. }
  183. static __inline__ __m128i __DEFAULT_FN_ATTRS
  184. _mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
  185. {
  186. return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
  187. (__v4si) __A,
  188. (__v4si) __B,
  189. (__mmask8) __U);
  190. }
  191. static __inline__ __m128i __DEFAULT_FN_ATTRS
  192. _mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
  193. {
  194. return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
  195. (__v4si) __A,
  196. (__v4si) __B,
  197. (__mmask8) -1);
  198. }
  199. static __inline__ __m128i __DEFAULT_FN_ATTRS
  200. _mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
  201. {
  202. return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
  203. (__v4si) __A,
  204. (__v4si) __B,
  205. (__mmask8) __U);
  206. }
  207. static __inline__ __m128i __DEFAULT_FN_ATTRS
  208. _mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
  209. {
  210. return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
  211. (__v4si) __A,
  212. (__v4si) __B,
  213. (__mmask8) __U);
  214. }
  215. static __inline__ __m128i __DEFAULT_FN_ATTRS
  216. _mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
  217. {
  218. return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
  219. (__v4si) __A,
  220. (__v4si) __B,
  221. (__mmask8) -1);
  222. }
  223. #undef __DEFAULT_FN_ATTRS
  224. #endif