immintrin.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /*===---- immintrin.h - Intel intrinsics -----------------------------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __IMMINTRIN_H
  24. #define __IMMINTRIN_H
  25. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
  26. #include <mmintrin.h>
  27. #endif
  28. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
  29. #include <xmmintrin.h>
  30. #endif
  31. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
  32. #include <emmintrin.h>
  33. #endif
  34. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
  35. #include <pmmintrin.h>
  36. #endif
  37. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
  38. #include <tmmintrin.h>
  39. #endif
  40. #if !defined(_MSC_VER) || __has_feature(modules) || \
  41. (defined(__SSE4_2__) || defined(__SSE4_1__))
  42. #include <smmintrin.h>
  43. #endif
  44. #if !defined(_MSC_VER) || __has_feature(modules) || \
  45. (defined(__AES__) || defined(__PCLMUL__))
  46. #include <wmmintrin.h>
  47. #endif
  48. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
  49. #include <clflushoptintrin.h>
  50. #endif
  51. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
  52. #include <clwbintrin.h>
  53. #endif
  54. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
  55. #include <avxintrin.h>
  56. #endif
  57. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
  58. #include <avx2intrin.h>
  59. /* The 256-bit versions of functions in f16cintrin.h.
  60. Intel documents these as being in immintrin.h, and
  61. they depend on typedefs from avxintrin.h. */
  62. /// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector
  63. /// containing 16-bit half-precision float values.
  64. ///
  65. /// \headerfile <x86intrin.h>
  66. ///
  67. /// \code
  68. /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
  69. /// \endcode
  70. ///
  71. /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
  72. ///
  73. /// \param a
  74. /// A 256-bit vector containing 32-bit single-precision float values to be
  75. /// converted to 16-bit half-precision float values.
  76. /// \param imm
  77. /// An immediate value controlling rounding using bits [2:0]: \n
  78. /// 000: Nearest \n
  79. /// 001: Down \n
  80. /// 010: Up \n
  81. /// 011: Truncate \n
  82. /// 1XX: Use MXCSR.RC for rounding
  83. /// \returns A 128-bit vector containing the converted 16-bit half-precision
  84. /// float values.
  85. #define _mm256_cvtps_ph(a, imm) __extension__ ({ \
  86. (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
  87. /// \brief Converts a 128-bit vector containing 16-bit half-precision float
  88. /// values into a 256-bit vector of [8 x float].
  89. ///
  90. /// \headerfile <x86intrin.h>
  91. ///
  92. /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
  93. ///
  94. /// \param __a
  95. /// A 128-bit vector containing 16-bit half-precision float values to be
  96. /// converted to 32-bit single-precision float values.
  97. /// \returns A vector of [8 x float] containing the converted 32-bit
  98. /// single-precision float values.
  99. static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
  100. _mm256_cvtph_ps(__m128i __a)
  101. {
  102. return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
  103. }
  104. #endif /* __AVX2__ */
  105. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
  106. #include <vpclmulqdqintrin.h>
  107. #endif
  108. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
  109. #include <bmiintrin.h>
  110. #endif
  111. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
  112. #include <bmi2intrin.h>
  113. #endif
  114. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
  115. #include <lzcntintrin.h>
  116. #endif
  117. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
  118. #include <fmaintrin.h>
  119. #endif
  120. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
  121. #include <avx512fintrin.h>
  122. #endif
  123. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
  124. #include <avx512vlintrin.h>
  125. #endif
  126. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
  127. #include <avx512bwintrin.h>
  128. #endif
  129. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
  130. #include <avx512bitalgintrin.h>
  131. #endif
  132. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
  133. #include <avx512cdintrin.h>
  134. #endif
  135. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
  136. #include <avx512vpopcntdqintrin.h>
  137. #endif
  138. #if !defined(_MSC_VER) || __has_feature(modules) || \
  139. (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
  140. #include <avx512vpopcntdqvlintrin.h>
  141. #endif
  142. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
  143. #include <avx512vnniintrin.h>
  144. #endif
  145. #if !defined(_MSC_VER) || __has_feature(modules) || \
  146. (defined(__AVX512VL__) && defined(__AVX512VNNI__))
  147. #include <avx512vlvnniintrin.h>
  148. #endif
  149. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
  150. #include <avx512dqintrin.h>
  151. #endif
  152. #if !defined(_MSC_VER) || __has_feature(modules) || \
  153. (defined(__AVX512VL__) && defined(__AVX512BITALG__))
  154. #include <avx512vlbitalgintrin.h>
  155. #endif
  156. #if !defined(_MSC_VER) || __has_feature(modules) || \
  157. (defined(__AVX512VL__) && defined(__AVX512BW__))
  158. #include <avx512vlbwintrin.h>
  159. #endif
  160. #if !defined(_MSC_VER) || __has_feature(modules) || \
  161. (defined(__AVX512VL__) && defined(__AVX512CD__))
  162. #include <avx512vlcdintrin.h>
  163. #endif
  164. #if !defined(_MSC_VER) || __has_feature(modules) || \
  165. (defined(__AVX512VL__) && defined(__AVX512DQ__))
  166. #include <avx512vldqintrin.h>
  167. #endif
  168. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
  169. #include <avx512erintrin.h>
  170. #endif
  171. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
  172. #include <avx512ifmaintrin.h>
  173. #endif
  174. #if !defined(_MSC_VER) || __has_feature(modules) || \
  175. (defined(__AVX512IFMA__) && defined(__AVX512VL__))
  176. #include <avx512ifmavlintrin.h>
  177. #endif
  178. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
  179. #include <avx512vbmiintrin.h>
  180. #endif
  181. #if !defined(_MSC_VER) || __has_feature(modules) || \
  182. (defined(__AVX512VBMI__) && defined(__AVX512VL__))
  183. #include <avx512vbmivlintrin.h>
  184. #endif
  185. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
  186. #include <avx512vbmi2intrin.h>
  187. #endif
  188. #if !defined(_MSC_VER) || __has_feature(modules) || \
  189. (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
  190. #include <avx512vlvbmi2intrin.h>
  191. #endif
  192. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
  193. #include <avx512pfintrin.h>
  194. #endif
  195. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
  196. #include <pkuintrin.h>
  197. #endif
  198. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
  199. #include <vaesintrin.h>
  200. #endif
  201. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
  202. #include <gfniintrin.h>
  203. #endif
  204. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
  205. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
  206. _rdrand16_step(unsigned short *__p)
  207. {
  208. return __builtin_ia32_rdrand16_step(__p);
  209. }
  210. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
  211. _rdrand32_step(unsigned int *__p)
  212. {
  213. return __builtin_ia32_rdrand32_step(__p);
  214. }
  215. #ifdef __x86_64__
  216. static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
  217. _rdrand64_step(unsigned long long *__p)
  218. {
  219. return __builtin_ia32_rdrand64_step(__p);
  220. }
  221. #endif
  222. #endif /* __RDRND__ */
  223. /* __bit_scan_forward */
  224. static __inline__ int __attribute__((__always_inline__, __nodebug__))
  225. _bit_scan_forward(int __A) {
  226. return __builtin_ctz(__A);
  227. }
  228. /* __bit_scan_reverse */
  229. static __inline__ int __attribute__((__always_inline__, __nodebug__))
  230. _bit_scan_reverse(int __A) {
  231. return 31 - __builtin_clz(__A);
  232. }
  233. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
  234. #ifdef __x86_64__
  235. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  236. _readfsbase_u32(void)
  237. {
  238. return __builtin_ia32_rdfsbase32();
  239. }
  240. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  241. _readfsbase_u64(void)
  242. {
  243. return __builtin_ia32_rdfsbase64();
  244. }
  245. static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  246. _readgsbase_u32(void)
  247. {
  248. return __builtin_ia32_rdgsbase32();
  249. }
  250. static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  251. _readgsbase_u64(void)
  252. {
  253. return __builtin_ia32_rdgsbase64();
  254. }
  255. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  256. _writefsbase_u32(unsigned int __V)
  257. {
  258. return __builtin_ia32_wrfsbase32(__V);
  259. }
  260. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  261. _writefsbase_u64(unsigned long long __V)
  262. {
  263. return __builtin_ia32_wrfsbase64(__V);
  264. }
  265. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  266. _writegsbase_u32(unsigned int __V)
  267. {
  268. return __builtin_ia32_wrgsbase32(__V);
  269. }
  270. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
  271. _writegsbase_u64(unsigned long long __V)
  272. {
  273. return __builtin_ia32_wrgsbase64(__V);
  274. }
  275. #endif
  276. #endif /* __FSGSBASE__ */
  277. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
  278. #include <rtmintrin.h>
  279. #include <xtestintrin.h>
  280. #endif
  281. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
  282. #include <shaintrin.h>
  283. #endif
  284. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
  285. #include <fxsrintrin.h>
  286. #endif
  287. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
  288. #include <xsaveintrin.h>
  289. #endif
  290. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
  291. #include <xsaveoptintrin.h>
  292. #endif
  293. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
  294. #include <xsavecintrin.h>
  295. #endif
  296. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
  297. #include <xsavesintrin.h>
  298. #endif
  299. #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
  300. #include <cetintrin.h>
  301. #endif
  302. /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
  303. * whereas others are also available at all times. */
  304. #include <adxintrin.h>
  305. #endif /* __IMMINTRIN_H */