__clang_cuda_cmath.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __CLANG_CUDA_CMATH_H__
  24. #define __CLANG_CUDA_CMATH_H__
  25. #ifndef __CUDA__
  26. #error "This file is for CUDA compilation only."
  27. #endif
  28. #include <limits>
  29. // CUDA lets us use various std math functions on the device side. This file
  30. // works in concert with __clang_cuda_math_forward_declares.h to make this work.
  31. //
  32. // Specifically, the forward-declares header declares __device__ overloads for
  33. // these functions in the global namespace, then pulls them into namespace std
  34. // with 'using' statements. Then this file implements those functions, after
  35. // their implementations have been pulled in.
  36. //
  37. // It's important that we declare the functions in the global namespace and pull
  38. // them into namespace std with using statements, as opposed to simply declaring
  39. // these functions in namespace std, because our device functions need to
  40. // overload the standard library functions, which may be declared in the global
  41. // namespace or in std, depending on the degree of conformance of the stdlib
  42. // implementation. Declaring in the global namespace and pulling into namespace
  43. // std covers all of the known knowns.
  44. #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
  45. __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
  46. __DEVICE__ long abs(long __n) { return ::labs(__n); }
  47. __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
  48. __DEVICE__ double abs(double __x) { return ::fabs(__x); }
  49. __DEVICE__ float acos(float __x) { return ::acosf(__x); }
  50. __DEVICE__ float asin(float __x) { return ::asinf(__x); }
  51. __DEVICE__ float atan(float __x) { return ::atanf(__x); }
  52. __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
  53. __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
  54. __DEVICE__ float cos(float __x) { return ::cosf(__x); }
  55. __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
  56. __DEVICE__ float exp(float __x) { return ::expf(__x); }
  57. __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
  58. __DEVICE__ float floor(float __x) { return ::floorf(__x); }
  59. __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
  60. __DEVICE__ int fpclassify(float __x) {
  61. return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  62. FP_ZERO, __x);
  63. }
  64. __DEVICE__ int fpclassify(double __x) {
  65. return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
  66. FP_ZERO, __x);
  67. }
  68. __DEVICE__ float frexp(float __arg, int *__exp) {
  69. return ::frexpf(__arg, __exp);
  70. }
  71. // For inscrutable reasons, the CUDA headers define these functions for us on
  72. // Windows.
  73. #ifndef _MSC_VER
  74. __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
  75. __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
  76. __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
  77. // For inscrutable reasons, __finite(), the double-precision version of
  78. // __finitef, does not exist when compiling for MacOS. __isfinited is available
  79. // everywhere and is just as good.
  80. __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
  81. __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
  82. __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
  83. #endif
  84. __DEVICE__ bool isgreater(float __x, float __y) {
  85. return __builtin_isgreater(__x, __y);
  86. }
  87. __DEVICE__ bool isgreater(double __x, double __y) {
  88. return __builtin_isgreater(__x, __y);
  89. }
  90. __DEVICE__ bool isgreaterequal(float __x, float __y) {
  91. return __builtin_isgreaterequal(__x, __y);
  92. }
  93. __DEVICE__ bool isgreaterequal(double __x, double __y) {
  94. return __builtin_isgreaterequal(__x, __y);
  95. }
  96. __DEVICE__ bool isless(float __x, float __y) {
  97. return __builtin_isless(__x, __y);
  98. }
  99. __DEVICE__ bool isless(double __x, double __y) {
  100. return __builtin_isless(__x, __y);
  101. }
  102. __DEVICE__ bool islessequal(float __x, float __y) {
  103. return __builtin_islessequal(__x, __y);
  104. }
  105. __DEVICE__ bool islessequal(double __x, double __y) {
  106. return __builtin_islessequal(__x, __y);
  107. }
  108. __DEVICE__ bool islessgreater(float __x, float __y) {
  109. return __builtin_islessgreater(__x, __y);
  110. }
  111. __DEVICE__ bool islessgreater(double __x, double __y) {
  112. return __builtin_islessgreater(__x, __y);
  113. }
  114. __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
  115. __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
  116. __DEVICE__ bool isunordered(float __x, float __y) {
  117. return __builtin_isunordered(__x, __y);
  118. }
  119. __DEVICE__ bool isunordered(double __x, double __y) {
  120. return __builtin_isunordered(__x, __y);
  121. }
  122. __DEVICE__ float ldexp(float __arg, int __exp) {
  123. return ::ldexpf(__arg, __exp);
  124. }
  125. __DEVICE__ float log(float __x) { return ::logf(__x); }
  126. __DEVICE__ float log10(float __x) { return ::log10f(__x); }
  127. __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
  128. __DEVICE__ float pow(float __base, float __exp) {
  129. return ::powf(__base, __exp);
  130. }
  131. __DEVICE__ float pow(float __base, int __iexp) {
  132. return ::powif(__base, __iexp);
  133. }
  134. __DEVICE__ double pow(double __base, int __iexp) {
  135. return ::powi(__base, __iexp);
  136. }
  137. __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
  138. __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
  139. __DEVICE__ float sin(float __x) { return ::sinf(__x); }
  140. __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
  141. __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
  142. __DEVICE__ float tan(float __x) { return ::tanf(__x); }
  143. __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
  144. // Notably missing above is nexttoward. We omit it because
  145. // libdevice doesn't provide an implementation, and we don't want to be in the
  146. // business of implementing tricky libm functions in this header.
  147. // Now we've defined everything we promised we'd define in
  148. // __clang_cuda_math_forward_declares.h. We need to do two additional things to
  149. // fix up our math functions.
  150. //
  151. // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
  152. // only sin(float) and sin(double), which means that e.g. sin(0) is
  153. // ambiguous.
  154. //
  155. // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
  156. // std. These are defined in the CUDA headers in the global namespace,
  157. // independent of everything else we've done here.
  158. // We can't use std::enable_if, because we want to be pre-C++11 compatible. But
  159. // we go ahead and unconditionally define functions that are only available when
  160. // compiling for C++11 to match the behavior of the CUDA headers.
  161. template<bool __B, class __T = void>
  162. struct __clang_cuda_enable_if {};
  163. template <class __T> struct __clang_cuda_enable_if<true, __T> {
  164. typedef __T type;
  165. };
  166. // Defines an overload of __fn that accepts one integral argument, calls
  167. // __fn((double)x), and returns __retty.
  168. #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
  169. template <typename __T> \
  170. __DEVICE__ \
  171. typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
  172. __retty>::type \
  173. __fn(__T __x) { \
  174. return ::__fn((double)__x); \
  175. }
  176. // Defines an overload of __fn that accepts one two arithmetic arguments, calls
  177. // __fn((double)x, (double)y), and returns a double.
  178. //
  179. // Note this is different from OVERLOAD_1, which generates an overload that
  180. // accepts only *integral* arguments.
  181. #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
  182. template <typename __T1, typename __T2> \
  183. __DEVICE__ typename __clang_cuda_enable_if< \
  184. std::numeric_limits<__T1>::is_specialized && \
  185. std::numeric_limits<__T2>::is_specialized, \
  186. __retty>::type \
  187. __fn(__T1 __x, __T2 __y) { \
  188. return __fn((double)__x, (double)__y); \
  189. }
  190. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
  191. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
  192. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
  193. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
  194. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
  195. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
  196. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
  197. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
  198. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
  199. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
  200. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
  201. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
  202. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
  203. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
  204. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
  205. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
  206. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
  207. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
  208. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
  209. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
  210. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
  211. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
  212. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
  213. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
  214. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
  215. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
  216. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
  217. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
  218. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
  219. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
  220. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
  221. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
  222. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
  223. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
  224. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
  225. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
  226. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
  227. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
  228. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
  229. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
  230. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
  231. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
  232. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
  233. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
  234. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
  235. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
  236. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
  237. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
  238. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
  239. __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
  240. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
  241. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
  242. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
  243. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
  244. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
  245. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
  246. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
  247. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
  248. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
  249. __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
  250. #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
  251. #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
  252. // Overloads for functions that don't match the patterns expected by
  253. // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
  254. template <typename __T1, typename __T2, typename __T3>
  255. __DEVICE__ typename __clang_cuda_enable_if<
  256. std::numeric_limits<__T1>::is_specialized &&
  257. std::numeric_limits<__T2>::is_specialized &&
  258. std::numeric_limits<__T3>::is_specialized,
  259. double>::type
  260. fma(__T1 __x, __T2 __y, __T3 __z) {
  261. return std::fma((double)__x, (double)__y, (double)__z);
  262. }
  263. template <typename __T>
  264. __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
  265. double>::type
  266. frexp(__T __x, int *__exp) {
  267. return std::frexp((double)__x, __exp);
  268. }
  269. template <typename __T>
  270. __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
  271. double>::type
  272. ldexp(__T __x, int __exp) {
  273. return std::ldexp((double)__x, __exp);
  274. }
  275. template <typename __T1, typename __T2>
  276. __DEVICE__ typename __clang_cuda_enable_if<
  277. std::numeric_limits<__T1>::is_specialized &&
  278. std::numeric_limits<__T2>::is_specialized,
  279. double>::type
  280. remquo(__T1 __x, __T2 __y, int *__quo) {
  281. return std::remquo((double)__x, (double)__y, __quo);
  282. }
  283. template <typename __T>
  284. __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
  285. double>::type
  286. scalbln(__T __x, long __exp) {
  287. return std::scalbln((double)__x, __exp);
  288. }
  289. template <typename __T>
  290. __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
  291. double>::type
  292. scalbn(__T __x, int __exp) {
  293. return std::scalbn((double)__x, __exp);
  294. }
  295. // We need to define these overloads in exactly the namespace our standard
  296. // library uses (including the right inline namespace), otherwise they won't be
  297. // picked up by other functions in the standard library (e.g. functions in
  298. // <complex>). Thus the ugliness below.
  299. #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
  300. _LIBCPP_BEGIN_NAMESPACE_STD
  301. #else
  302. namespace std {
  303. #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
  304. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  305. #endif
  306. #endif
  307. // Pull the new overloads we defined above into namespace std.
  308. using ::acos;
  309. using ::acosh;
  310. using ::asin;
  311. using ::asinh;
  312. using ::atan;
  313. using ::atan2;
  314. using ::atanh;
  315. using ::cbrt;
  316. using ::ceil;
  317. using ::copysign;
  318. using ::cos;
  319. using ::cosh;
  320. using ::erf;
  321. using ::erfc;
  322. using ::exp;
  323. using ::exp2;
  324. using ::expm1;
  325. using ::fabs;
  326. using ::fdim;
  327. using ::floor;
  328. using ::fma;
  329. using ::fmax;
  330. using ::fmin;
  331. using ::fmod;
  332. using ::fpclassify;
  333. using ::frexp;
  334. using ::hypot;
  335. using ::ilogb;
  336. using ::isfinite;
  337. using ::isgreater;
  338. using ::isgreaterequal;
  339. using ::isless;
  340. using ::islessequal;
  341. using ::islessgreater;
  342. using ::isnormal;
  343. using ::isunordered;
  344. using ::ldexp;
  345. using ::lgamma;
  346. using ::llrint;
  347. using ::llround;
  348. using ::log;
  349. using ::log10;
  350. using ::log1p;
  351. using ::log2;
  352. using ::logb;
  353. using ::lrint;
  354. using ::lround;
  355. using ::nearbyint;
  356. using ::nextafter;
  357. using ::pow;
  358. using ::remainder;
  359. using ::remquo;
  360. using ::rint;
  361. using ::round;
  362. using ::scalbln;
  363. using ::scalbn;
  364. using ::signbit;
  365. using ::sin;
  366. using ::sinh;
  367. using ::sqrt;
  368. using ::tan;
  369. using ::tanh;
  370. using ::tgamma;
  371. using ::trunc;
  372. // Well this is fun: We need to pull these symbols in for libc++, but we can't
  373. // pull them in with libstdc++, because its ::isinf and ::isnan are different
  374. // than its std::isinf and std::isnan.
  375. #ifndef __GLIBCXX__
  376. using ::isinf;
  377. using ::isnan;
  378. #endif
  379. // Finally, pull the "foobarf" functions that CUDA defines in its headers into
  380. // namespace std.
  381. using ::acosf;
  382. using ::acoshf;
  383. using ::asinf;
  384. using ::asinhf;
  385. using ::atan2f;
  386. using ::atanf;
  387. using ::atanhf;
  388. using ::cbrtf;
  389. using ::ceilf;
  390. using ::copysignf;
  391. using ::cosf;
  392. using ::coshf;
  393. using ::erfcf;
  394. using ::erff;
  395. using ::exp2f;
  396. using ::expf;
  397. using ::expm1f;
  398. using ::fabsf;
  399. using ::fdimf;
  400. using ::floorf;
  401. using ::fmaf;
  402. using ::fmaxf;
  403. using ::fminf;
  404. using ::fmodf;
  405. using ::frexpf;
  406. using ::hypotf;
  407. using ::ilogbf;
  408. using ::ldexpf;
  409. using ::lgammaf;
  410. using ::llrintf;
  411. using ::llroundf;
  412. using ::log10f;
  413. using ::log1pf;
  414. using ::log2f;
  415. using ::logbf;
  416. using ::logf;
  417. using ::lrintf;
  418. using ::lroundf;
  419. using ::modff;
  420. using ::nearbyintf;
  421. using ::nextafterf;
  422. using ::powf;
  423. using ::remainderf;
  424. using ::remquof;
  425. using ::rintf;
  426. using ::roundf;
  427. using ::scalblnf;
  428. using ::scalbnf;
  429. using ::sinf;
  430. using ::sinhf;
  431. using ::sqrtf;
  432. using ::tanf;
  433. using ::tanhf;
  434. using ::tgammaf;
  435. using ::truncf;
  436. #ifdef _LIBCPP_END_NAMESPACE_STD
  437. _LIBCPP_END_NAMESPACE_STD
  438. #else
  439. #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
  440. _GLIBCXX_END_NAMESPACE_VERSION
  441. #endif
  442. } // namespace std
  443. #endif
  444. #undef __DEVICE__
  445. #endif