17#if defined(HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
20#undef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
22#define HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
41template <
class D,
class V>
43template <
class D,
class V>
56template <
class D,
class V>
58template <
class D,
class V>
71template <
class D,
class V>
73template <
class D,
class V>
86template <
class D,
class V>
88template <
class D,
class V>
101template <
class D,
class V>
103template <
class D,
class V>
116template <
class D,
class V>
118template <
class D,
class V>
125#ifndef HWY_HAVE_ATAN2
126#define HWY_HAVE_ATAN2 1
136template <
class D,
class V = VFromD<D>,
class M = MFromD<D>,
137 typename T = TFromD<D>>
139 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
140 const V kPi =
Set(
d,
static_cast<T
>(+3.14159265358979323846264));
141 const V kPi2 =
Mul(kPi, kHalf);
143 const V k0 =
Zero(
d);
144 const M y_0 =
Eq(y, k0);
145 const M x_0 =
Eq(x, k0);
146 const M x_neg =
Lt(x, k0);
147 const M y_inf =
IsInf(y);
148 const M x_inf =
IsInf(x);
153 const V if_yinf =
Mul(kHalf,
IfThenElse(x_inf,
Add(kPi2, if_xneg_pi), kPi));
166template <
class D,
class V>
179template <
class D,
class V>
181template <
class D,
class V>
194template <
class D,
class V>
196template <
class D,
class V>
209template <
class D,
class V>
211template <
class D,
class V>
224template <
class D,
class V>
226template <
class D,
class V>
239template <
class D,
class V>
241template <
class D,
class V>
254template <
class D,
class V>
256template <
class D,
class V>
269template <
class D,
class V>
271template <
class D,
class V>
284template <
class D,
class V>
286template <
class D,
class V>
299template <
class D,
class V>
301template <
class D,
class V>
314template <
class D,
class V>
316template <
class D,
class V>
331template <
class D,
class V>
333template <
class D,
class V>
402 T c6, T c7, T c8, T c9) {
412 T c6, T c7, T c8, T c9, T c10) {
422 T c6, T c7, T c8, T c9, T c10, T c11) {
432 T c6, T c7, T c8, T c9, T c10, T c11,
438 x8,
MulAdd(x4, c12,
MulAdd(x2,
MulAdd(c11, x, c10),
MulAdd(c9, x, c8))),
444 T c6, T c7, T c8, T c9, T c10, T c11,
457 T c6, T c7, T c8, T c9, T c10, T c11,
458 T c12, T c13, T c14) {
470 T c6, T c7, T c8, T c9, T c10, T c11,
471 T c12, T c13, T c14, T c15) {
483 T c6, T c7, T c8, T c9, T c10, T c11,
484 T c12, T c13, T c14, T c15, T c16) {
499 T c6, T c7, T c8, T c9, T c10, T c11,
500 T c12, T c13, T c14, T c15, T c16, T c17) {
515 T c6, T c7, T c8, T c9, T c10, T c11,
516 T c12, T c13, T c14, T c15, T c16, T c17,
531template <
class FloatOrDouble>
533template <
class FloatOrDouble>
535template <
class FloatOrDouble>
537template <
class FloatOrDouble>
539template <
class FloatOrDouble>
541template <
class FloatOrDouble>
547 template <
class D,
class V>
549 const auto k0 =
Set(
d, +0.1666677296f);
550 const auto k1 =
Set(
d, +0.07495029271f);
551 const auto k2 =
Set(
d, +0.04547423869f);
552 const auto k3 =
Set(
d, +0.02424046025f);
553 const auto k4 =
Set(
d, +0.04197454825f);
555 return Estrin(x2, k0, k1, k2, k3, k4);
559#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
562struct AsinImpl<double> {
564 template <
class D,
class V>
566 const auto k0 =
Set(
d, +0.1666666666666497543);
567 const auto k1 =
Set(
d, +0.07500000000378581611);
568 const auto k2 =
Set(
d, +0.04464285681377102438);
569 const auto k3 =
Set(
d, +0.03038195928038132237);
570 const auto k4 =
Set(
d, +0.02237176181932048341);
571 const auto k5 =
Set(
d, +0.01735956991223614604);
572 const auto k6 =
Set(
d, +0.01388715184501609218);
573 const auto k7 =
Set(
d, +0.01215360525577377331);
574 const auto k8 =
Set(
d, +0.006606077476277170610);
575 const auto k9 =
Set(
d, +0.01929045477267910674);
576 const auto k10 =
Set(
d, -0.01581918243329996643);
577 const auto k11 =
Set(
d, +0.03161587650653934628);
579 return Estrin(x2, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11);
588 template <
class D,
class V>
590 const auto k0 =
Set(
d, -0.333331018686294555664062f);
591 const auto k1 =
Set(
d, +0.199926957488059997558594f);
592 const auto k2 =
Set(
d, -0.142027363181114196777344f);
593 const auto k3 =
Set(
d, +0.106347933411598205566406f);
594 const auto k4 =
Set(
d, -0.0748900920152664184570312f);
595 const auto k5 =
Set(
d, +0.0425049886107444763183594f);
596 const auto k6 =
Set(
d, -0.0159569028764963150024414f);
597 const auto k7 =
Set(
d, +0.00282363896258175373077393f);
599 const auto y =
Mul(x, x);
600 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7),
Mul(y, x), x);
604#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
607struct AtanImpl<double> {
609 template <
class D,
class V>
611 const auto k0 =
Set(
d, -0.333333333333311110369124);
612 const auto k1 =
Set(
d, +0.199999999996591265594148);
613 const auto k2 =
Set(
d, -0.14285714266771329383765);
614 const auto k3 =
Set(
d, +0.111111105648261418443745);
615 const auto k4 =
Set(
d, -0.090908995008245008229153);
616 const auto k5 =
Set(
d, +0.0769219538311769618355029);
617 const auto k6 =
Set(
d, -0.0666573579361080525984562);
618 const auto k7 =
Set(
d, +0.0587666392926673580854313);
619 const auto k8 =
Set(
d, -0.0523674852303482457616113);
620 const auto k9 =
Set(
d, +0.0466667150077840625632675);
621 const auto k10 =
Set(
d, -0.0407629191276836500001934);
622 const auto k11 =
Set(
d, +0.0337852580001353069993897);
623 const auto k12 =
Set(
d, -0.0254517624932312641616861);
624 const auto k13 =
Set(
d, +0.016599329773529201970117);
625 const auto k14 =
Set(
d, -0.00889896195887655491740809);
626 const auto k15 =
Set(
d, +0.00370026744188713119232403);
627 const auto k16 =
Set(
d, -0.00110611831486672482563471);
628 const auto k17 =
Set(
d, +0.000209850076645816976906797);
629 const auto k18 =
Set(
d, -1.88796008463073496563746e-5);
631 const auto y =
Mul(x, x);
632 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11,
633 k12, k13, k14, k15, k16, k17, k18),
643 template <
class D,
class V>
648 template <
class D,
class V>
650 const auto k0 =
Set(
d, -1.66666597127914428710938e-1f);
651 const auto k1 =
Set(
d, +8.33307858556509017944336e-3f);
652 const auto k2 =
Set(
d, -1.981069071916863322258e-4f);
653 const auto k3 =
Set(
d, +2.6083159809786593541503e-6f);
655 const auto y =
Mul(x, x);
659 template <
class D,
class V,
class VI32>
662 const V kHalfPiPart0f =
Set(
d, -0.5f * 3.140625f);
663 const V kHalfPiPart1f =
Set(
d, -0.5f * 0.0009670257568359375f);
664 const V kHalfPiPart2f =
Set(
d, -0.5f * 6.2771141529083251953e-7f);
665 const V kHalfPiPart3f =
Set(
d, -0.5f * 1.2154201256553420762e-10f);
669 x =
MulAdd(qf, kHalfPiPart0f, x);
670 x =
MulAdd(qf, kHalfPiPart1f, x);
671 x =
MulAdd(qf, kHalfPiPart2f, x);
672 x =
MulAdd(qf, kHalfPiPart3f, x);
676 template <
class D,
class V,
class VI32>
679 const V kPiPart0f =
Set(
d, -3.140625f);
680 const V kPiPart1f =
Set(
d, -0.0009670257568359375f);
681 const V kPiPart2f =
Set(
d, -6.2771141529083251953e-7f);
682 const V kPiPart3f =
Set(
d, -1.2154201256553420762e-10f);
686 x =
MulAdd(qf, kPiPart0f, x);
687 x =
MulAdd(qf, kPiPart1f, x);
688 x =
MulAdd(qf, kPiPart2f, x);
689 x =
MulAdd(qf, kPiPart3f, x);
694 template <
class D,
class VI32>
701 template <
class D,
class VI32>
708#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
711struct CosSinImpl<double> {
713 template <
class D,
class V>
718 template <
class D,
class V>
720 const auto k0 =
Set(
d, -0.166666666666666657414808);
721 const auto k1 =
Set(
d, +0.00833333333333332974823815);
722 const auto k2 =
Set(
d, -0.000198412698412696162806809);
723 const auto k3 =
Set(
d, +2.75573192239198747630416e-6);
724 const auto k4 =
Set(
d, -2.50521083763502045810755e-8);
725 const auto k5 =
Set(
d, +1.60590430605664501629054e-10);
726 const auto k6 =
Set(
d, -7.64712219118158833288484e-13);
727 const auto k7 =
Set(
d, +2.81009972710863200091251e-15);
728 const auto k8 =
Set(
d, -7.97255955009037868891952e-18);
730 const auto y =
Mul(x, x);
731 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8),
Mul(y, x), x);
734 template <
class D,
class V,
class VI32>
737 const V kHalfPiPart0d =
Set(
d, -0.5 * 3.1415926218032836914);
738 const V kHalfPiPart1d =
Set(
d, -0.5 * 3.1786509424591713469e-8);
739 const V kHalfPiPart2d =
Set(
d, -0.5 * 1.2246467864107188502e-16);
740 const V kHalfPiPart3d =
Set(
d, -0.5 * 1.2736634327021899816e-24);
744 x =
MulAdd(qf, kHalfPiPart0d, x);
745 x =
MulAdd(qf, kHalfPiPart1d, x);
746 x =
MulAdd(qf, kHalfPiPart2d, x);
747 x =
MulAdd(qf, kHalfPiPart3d, x);
751 template <
class D,
class V,
class VI32>
754 const V kPiPart0d =
Set(
d, -3.1415926218032836914);
755 const V kPiPart1d =
Set(
d, -3.1786509424591713469e-8);
756 const V kPiPart2d =
Set(
d, -1.2246467864107188502e-16);
757 const V kPiPart3d =
Set(
d, -1.2736634327021899816e-24);
761 x =
MulAdd(qf, kPiPart0d, x);
762 x =
MulAdd(qf, kPiPart1d, x);
763 x =
MulAdd(qf, kPiPart2d, x);
764 x =
MulAdd(qf, kPiPart3d, x);
769 template <
class D,
class VI32>
770 HWY_INLINE Vec<Rebind<double, D>> CosSignFromQuadrant(D
d, VI32 q) {
771 const VI32 kTwo =
Set(Rebind<int32_t, D>(), 2);
777 template <
class D,
class VI32>
778 HWY_INLINE Vec<Rebind<double, D>> SinSignFromQuadrant(D
d, VI32 q) {
779 const VI32 kOne =
Set(Rebind<int32_t, D>(), 1);
781 d, ShiftLeft<63>(
PromoteTo(Rebind<int64_t, D>(),
And(q, kOne))));
790 template <
class D,
class V>
795 template <
class D,
class V>
797 const auto k0 =
Set(
d, +0.5f);
798 const auto k1 =
Set(
d, +0.166666671633720397949219f);
799 const auto k2 =
Set(
d, +0.0416664853692054748535156f);
800 const auto k3 =
Set(
d, +0.00833336077630519866943359f);
801 const auto k4 =
Set(
d, +0.00139304355252534151077271f);
802 const auto k5 =
Set(
d, +0.000198527617612853646278381f);
808 template <
class D,
class VI32>
811 const VI32 kOffset =
Set(di32, 0x7F);
816 template <
class D,
class V,
class VI32>
818 const VI32 y = ShiftRight<1>(e);
819 return Mul(
Mul(x, Pow2I(
d, y)), Pow2I(
d,
Sub(e, y)));
822 template <
class D,
class V,
class VI32>
825 const V kLn2Part0f =
Set(
d, -0.693145751953125f);
826 const V kLn2Part1f =
Set(
d, -1.428606765330187045e-6f);
830 x =
MulAdd(qf, kLn2Part0f, x);
831 x =
MulAdd(qf, kLn2Part1f, x);
838 template <
class D,
class V>
842 const auto kBias =
Set(di32, 0x7F);
847 template <
class D,
class V>
849 const V k0 =
Set(
d, 0.66666662693f);
850 const V k1 =
Set(
d, 0.40000972152f);
851 const V k2 =
Set(
d, 0.28498786688f);
852 const V k3 =
Set(
d, 0.24279078841f);
854 const V x2 =
Mul(x, x);
855 const V x4 =
Mul(x2, x2);
860#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
862struct ExpImpl<double> {
864 template <
class D,
class V>
869 template <
class D,
class V>
871 const auto k0 =
Set(
d, +0.5);
872 const auto k1 =
Set(
d, +0.166666666666666851703837);
873 const auto k2 =
Set(
d, +0.0416666666666665047591422);
874 const auto k3 =
Set(
d, +0.00833333333331652721664984);
875 const auto k4 =
Set(
d, +0.00138888888889774492207962);
876 const auto k5 =
Set(
d, +0.000198412698960509205564975);
877 const auto k6 =
Set(
d, +2.4801587159235472998791e-5);
878 const auto k7 =
Set(
d, +2.75572362911928827629423e-6);
879 const auto k8 =
Set(
d, +2.75573911234900471893338e-7);
880 const auto k9 =
Set(
d, +2.51112930892876518610661e-8);
881 const auto k10 =
Set(
d, +2.08860621107283687536341e-9);
883 return MulAdd(
Estrin(x, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10),
888 template <
class D,
class VI32>
890 const Rebind<int32_t, D> di32;
891 const Rebind<int64_t, D> di64;
892 const VI32 kOffset =
Set(di32, 0x3FF);
897 template <
class D,
class V,
class VI32>
899 const VI32 y = ShiftRight<1>(e);
900 return Mul(
Mul(x, Pow2I(
d, y)), Pow2I(
d,
Sub(e, y)));
903 template <
class D,
class V,
class VI32>
906 const V kLn2Part0d =
Set(
d, -0.6931471805596629565116018);
907 const V kLn2Part1d =
Set(
d, -0.28235290563031577122588448175e-12);
911 x =
MulAdd(qf, kLn2Part0d, x);
912 x =
MulAdd(qf, kLn2Part1d, x);
918struct LogImpl<double> {
919 template <
class D,
class V>
920 HWY_INLINE Vec<Rebind<int64_t, D>> Log2p1NoSubnormal(D , V x) {
921 const Rebind<int64_t, D> di64;
922 const Rebind<uint64_t, D> du64;
928 template <
class D,
class V>
930 const V k0 =
Set(
d, 0.6666666666666735130);
931 const V k1 =
Set(
d, 0.3999999999940941908);
932 const V k2 =
Set(
d, 0.2857142874366239149);
933 const V k3 =
Set(
d, 0.2222219843214978396);
934 const V k4 =
Set(
d, 0.1818357216161805012);
935 const V k5 =
Set(
d, 0.1531383769920937332);
936 const V k6 =
Set(
d, 0.1479819860511658591);
938 const V x2 =
Mul(x, x);
939 const V x4 =
Mul(x2, x2);
947template <
class D,
class V,
bool kAllowSubnormals = true>
953 constexpr bool kIsF32 = (
sizeof(T) == 4);
956 const V kLn2Hi =
Set(
d, kIsF32 ?
static_cast<T
>(0.69313812256f)
957 :
static_cast<T
>(0.693147180369123816490));
958 const V kLn2Lo =
Set(
d, kIsF32 ?
static_cast<T
>(9.0580006145e-6f)
959 :
static_cast<T
>(1.90821492927058770002e-10));
960 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
961 const V kMinNormal =
Set(
d, kIsF32 ?
static_cast<T
>(1.175494351e-38f)
962 :
static_cast<T
>(2.2250738585072014e-308));
963 const V kScale =
Set(
d, kIsF32 ?
static_cast<T
>(3.355443200e+7f)
964 :
static_cast<T
>(1.8014398509481984e+16));
969 using VI =
decltype(
Zero(di));
970 const VI kLowerBits =
Set(di, kIsF32 ?
static_cast<TI
>(0x00000000L)
971 :
static_cast<TI
>(0xFFFFFFFFLL));
972 const VI kMagic =
Set(di, kIsF32 ?
static_cast<TI
>(0x3F3504F3L)
973 :
static_cast<TI
>(0x3FE6A09E00000000LL));
974 const VI kExpMask =
Set(di, kIsF32 ?
static_cast<TI
>(0x3F800000L)
975 :
static_cast<TI
>(0x3FF0000000000000LL));
977 Set(di, kIsF32 ?
static_cast<TI
>(-25) :
static_cast<TI
>(-54));
978 const VI kManMask =
Set(di, kIsF32 ?
static_cast<TI
>(0x7FFFFFL)
979 :
static_cast<TI
>(0xFFFFF00000000LL));
984 if (kAllowSubnormals ==
true) {
985 const auto is_denormal =
Lt(x, kMinNormal);
993 d,
Add(exp_scale, impl.Log2p1NoSubnormal(
d,
BitCast(
d, exp_bits))));
1005 const V ym1 =
Sub(y, kOne);
1006 const V z =
Div(ym1,
Add(y, kOne));
1018template <
class D,
class V>
1025 using VI =
decltype(
Zero(di));
1028 static constexpr size_t bits =
sizeof(TI) * 8;
1029 const VI sign_mask =
SignBit(di);
1030 const VI ci_0 =
Zero(di);
1031 const VI ci_1 =
Set(di, 1);
1032 const VI ci_2 =
Set(di, 2);
1033 const VI ci_4 =
Set(di, 4);
1034 const V cos_p0 =
Set(
d, ConvertScalarTo<T>(2.443315711809948E-005));
1035 const V cos_p1 =
Set(
d, ConvertScalarTo<T>(-1.388731625493765E-003));
1036 const V cos_p2 =
Set(
d, ConvertScalarTo<T>(4.166664568298827E-002));
1037 const V sin_p0 =
Set(
d, ConvertScalarTo<T>(-1.9515295891E-4));
1038 const V sin_p1 =
Set(
d, ConvertScalarTo<T>(8.3321608736E-3));
1039 const V sin_p2 =
Set(
d, ConvertScalarTo<T>(-1.6666654611E-1));
1040 const V FOPI =
Set(
d, ConvertScalarTo<T>(1.27323954473516));
1041 const V DP1 =
Set(
d, dp1);
1042 const V DP2 =
Set(
d, dp2);
1043 const V DP3 =
Set(
d, dp3);
1045 V xmm1, xmm2, sign_bit_sin, y;
1046 VI imm0, imm2, imm4;
1052 sign_bit_sin =
And(sign_bit_sin,
BitCast(
d, sign_mask));
1061 imm2 =
Add(imm2, ci_1);
1062 imm2 =
AndNot(ci_1, imm2);
1068 imm0 =
And(imm2, ci_4);
1071 V swap_sign_bit_sin =
BitCast(
d, imm0);
1074 imm2 =
And(imm2, ci_2);
1083 imm4 =
Sub(imm4, ci_2);
1084 imm4 =
AndNot(imm4, ci_4);
1089 sign_bit_sin =
Xor(sign_bit_sin, swap_sign_bit_sin);
1094 y =
MulAdd(cos_p0, z, cos_p1);
1095 y =
MulAdd(y, z, cos_p2);
1102 V y2 =
MulAdd(sin_p0, z, sin_p1);
1103 y2 =
MulAdd(y2, z, sin_p2);
1112 s =
Xor(xmm1, sign_bit_sin);
1113 c =
Xor(xmm2, sign_bit_cos);
1117template <
class D,
class V>
1124 using VI =
decltype(
Zero(di));
1127 static constexpr size_t bits =
sizeof(TI) * 8;
1128 const VI sign_mask =
SignBit(di);
1129 const VI ci_0 =
Zero(di);
1130 const VI ci_1 =
Set(di, 1);
1131 const VI ci_2 =
Set(di, 2);
1132 const VI ci_4 =
Set(di, 4);
1133 const V cos_p0 =
Set(
d, ConvertScalarTo<T>(-1.13585365213876817300E-11));
1134 const V cos_p1 =
Set(
d, ConvertScalarTo<T>(2.08757008419747316778E-9));
1135 const V cos_p2 =
Set(
d, ConvertScalarTo<T>(-2.75573141792967388112E-7));
1136 const V cos_p3 =
Set(
d, ConvertScalarTo<T>(2.48015872888517045348E-5));
1137 const V cos_p4 =
Set(
d, ConvertScalarTo<T>(-1.38888888888730564116E-3));
1138 const V cos_p5 =
Set(
d, ConvertScalarTo<T>(4.16666666666665929218E-2));
1139 const V sin_p0 =
Set(
d, ConvertScalarTo<T>(1.58962301576546568060E-10));
1140 const V sin_p1 =
Set(
d, ConvertScalarTo<T>(-2.50507477628578072866E-8));
1141 const V sin_p2 =
Set(
d, ConvertScalarTo<T>(2.75573136213857245213E-6));
1142 const V sin_p3 =
Set(
d, ConvertScalarTo<T>(-1.98412698295895385996E-4));
1143 const V sin_p4 =
Set(
d, ConvertScalarTo<T>(8.33333333332211858878E-3));
1144 const V sin_p5 =
Set(
d, ConvertScalarTo<T>(-1.66666666666666307295E-1));
1146 Set(
d, ConvertScalarTo<T>(1.2732395447351626861510701069801148));
1147 const V DP1 =
Set(
d, dp1);
1148 const V DP2 =
Set(
d, dp2);
1149 const V DP3 =
Set(
d, dp3);
1151 V xmm1, xmm2, sign_bit_sin, y;
1152 VI imm0, imm2, imm4;
1158 sign_bit_sin =
And(sign_bit_sin,
BitCast(
d, sign_mask));
1167 imm2 =
Add(imm2, ci_1);
1168 imm2 =
AndNot(ci_1, imm2);
1174 imm0 =
And(imm2, ci_4);
1177 V swap_sign_bit_sin =
BitCast(
d, imm0);
1180 imm2 =
And(imm2, ci_2);
1189 imm4 =
Sub(imm4, ci_2);
1190 imm4 =
AndNot(imm4, ci_4);
1194 sign_bit_sin =
Xor(sign_bit_sin, swap_sign_bit_sin);
1199 y =
MulAdd(cos_p0, z, cos_p1);
1200 y =
MulAdd(y, z, cos_p2);
1201 y =
MulAdd(y, z, cos_p3);
1202 y =
MulAdd(y, z, cos_p4);
1203 y =
MulAdd(y, z, cos_p5);
1210 V y2 =
MulAdd(sin_p0, z, sin_p1);
1211 y2 =
MulAdd(y2, z, sin_p2);
1212 y2 =
MulAdd(y2, z, sin_p3);
1213 y2 =
MulAdd(y2, z, sin_p4);
1214 y2 =
MulAdd(y2, z, sin_p5);
1223 s =
Xor(xmm1, sign_bit_sin);
1224 c =
Xor(xmm2, sign_bit_cos);
1229 template <
class D,
class V>
1231 SinCos3(
d, -0.78515625f, -2.4187564849853515625e-4f,
1232 -3.77489497744594108e-8f, x, s, c);
1236#if HWY_HAVE_FLOAT64 && HWY_HAVE_INTEGER64
1238struct SinCosImpl<double> {
1239 template <
class D,
class V>
1241 SinCos6(
d, -7.85398125648498535156E-1, -3.77489470793079817668E-8,
1242 -2.69515142907905952645E-15, x, s, c);
1249template <
class D,
class V>
1253 const V kZero =
Zero(
d);
1254 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1255 const V kPi =
Set(
d,
static_cast<T
>(+3.14159265358979323846264));
1256 const V kPiOverTwo =
Set(
d,
static_cast<T
>(+1.57079632679489661923132169));
1259 const V abs_x =
Xor(x, sign_x);
1260 const auto mask =
Lt(abs_x, kHalf);
1266 const V t =
Mul(impl.AsinPoly(
d, yy, y),
Mul(y, yy));
1268 const V t_plus_y =
Add(t, y);
1271 Add(t_plus_y, t_plus_y));
1275template <
class D,
class V>
1279 const V kLarge =
Set(
d,
static_cast<T
>(268435456.0));
1280 const V kLog2 =
Set(
d,
static_cast<T
>(0.693147180559945286227));
1281 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1282 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1284 const auto is_x_large =
Gt(x, kLarge);
1285 const auto is_x_gt_2 =
Gt(x, kTwo);
1287 const V x_minus_1 =
Sub(x, kOne);
1295 const auto is_pole =
Eq(y2, kOne);
1302template <
class D,
class V>
1306 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1307 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1308 const V kPiOverTwo =
Set(
d,
static_cast<T
>(+1.57079632679489661923132169));
1311 const V abs_x =
Xor(x, sign_x);
1312 const auto mask =
Lt(abs_x, kHalf);
1318 const V z0 =
MulAdd(impl.AsinPoly(
d, yy, y),
Mul(yy, y), y);
1319 const V z1 =
NegMulAdd(z0, kTwo, kPiOverTwo);
1323template <
class D,
class V>
1327 const V kSmall =
Set(
d,
static_cast<T
>(1.0 / 268435456.0));
1328 const V kLarge =
Set(
d,
static_cast<T
>(268435456.0));
1329 const V kLog2 =
Set(
d,
static_cast<T
>(0.693147180559945286227));
1330 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1331 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1334 const V abs_x =
Xor(x, sign_x);
1336 const auto is_x_large =
Gt(abs_x, kLarge);
1337 const auto is_x_lt_2 =
Lt(abs_x, kTwo);
1339 const V x2 =
Mul(x, x);
1340 const V sqrt_x2_plus_1 =
Sqrt(
Add(x2, kOne));
1342 const V y0 =
MulAdd(abs_x, kTwo,
Div(kOne,
Add(sqrt_x2_plus_1, abs_x)));
1343 const V y1 =
Add(
Div(x2,
Add(sqrt_x2_plus_1, kOne)), abs_x);
1348 const auto is_pole =
Eq(y2, kOne);
1356template <
class D,
class V>
1360 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1361 const V kPiOverTwo =
Set(
d,
static_cast<T
>(+1.57079632679489661923132169));
1364 const V abs_x =
Xor(x, sign);
1365 const auto mask =
Gt(abs_x, kOne);
1368 const auto divisor =
IfThenElse(mask, abs_x, kOne);
1369 const V y = impl.AtanPoly(
d,
IfThenElse(mask,
Div(kOne, divisor), abs_x));
1373template <
class D,
class V>
1377 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1378 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1381 const V abs_x =
Xor(x, sign);
1386template <
class D,
class V>
1392 const V kOneOverPi =
Set(
d,
static_cast<T
>(0.31830988618379067153));
1396 using VI32 =
decltype(
Zero(di32));
1397 const VI32 kOne =
Set(di32, 1);
1402 const VI32 q =
Add(ShiftLeft<1>(impl.ToInt32(
d,
Mul(y, kOneOverPi))), kOne);
1406 d,
Xor(impl.CosReduce(
d, y, q), impl.CosSignFromQuadrant(
d, q)));
1409template <
class D,
class V>
1413 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1414 const V kLowerBound =
1415 Set(
d,
static_cast<T
>((
sizeof(T) == 4 ? -104.0 : -1000.0)));
1416 const V kNegZero =
Set(
d,
static_cast<T
>(-0.0));
1417 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1418 const V kOneOverLog2 =
Set(
d,
static_cast<T
>(+1.442695040888963407359924681));
1424 impl.ToInt32(
d,
MulAdd(x, kOneOverLog2,
Or(kHalf,
And(x, kNegZero))));
1427 const V y = impl.LoadExpShortRange(
1428 d,
Add(impl.ExpPoly(
d, impl.ExpReduce(
d, x, q)), kOne), q);
1432template <
class D,
class V>
1436 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1437 const V kLowerBound =
1438 Set(
d,
static_cast<T
>((
sizeof(T) == 4 ? -104.0 : -1000.0)));
1439 const V kLn2Over2 =
Set(
d,
static_cast<T
>(+0.346573590279972654708616));
1440 const V kNegOne =
Set(
d,
static_cast<T
>(-1.0));
1441 const V kNegZero =
Set(
d,
static_cast<T
>(-0.0));
1442 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1443 const V kOneOverLog2 =
Set(
d,
static_cast<T
>(+1.442695040888963407359924681));
1449 impl.ToInt32(
d,
MulAdd(x, kOneOverLog2,
Or(kHalf,
And(x, kNegZero))));
1452 const V y = impl.ExpPoly(
d, impl.ExpReduce(
d, x, q));
1454 Sub(impl.LoadExpShortRange(
d,
Add(y, kOne), q), kOne));
1458template <
class D,
class V>
1463template <
class D,
class V>
1466 return Mul(
Log(
d, x),
Set(
d,
static_cast<T
>(0.4342944819032518276511)));
1469template <
class D,
class V>
1472 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1474 const V y =
Add(x, kOne);
1475 const auto is_pole =
Eq(y, kOne);
1477 const auto non_pole =
1478 Mul(impl::Log<D, V, /*kAllowSubnormals=*/false>(
d, y),
Div(x, divisor));
1482template <
class D,
class V>
1485 return Mul(
Log(
d, x),
Set(
d,
static_cast<T
>(1.44269504088896340735992)));
1488template <
class D,
class V>
1494 const V kOneOverPi =
Set(
d,
static_cast<T
>(0.31830988618379067153));
1495 const V kHalf =
Set(
d,
static_cast<T
>(0.5));
1499 using VI32 =
decltype(
Zero(di32));
1501 const V abs_x =
Abs(x);
1502 const V sign_x =
Xor(abs_x, x);
1505 const VI32 q = impl.ToInt32(
d,
MulAdd(abs_x, kOneOverPi, kHalf));
1508 return impl.Poly(
d,
Xor(impl.SinReduce(
d, abs_x, q),
1509 Xor(impl.SinSignFromQuadrant(
d, q), sign_x)));
1512template <
class D,
class V>
1515 const V kHalf =
Set(
d,
static_cast<T
>(+0.5));
1516 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1517 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1520 const V abs_x =
Xor(x, sign);
1521 const V y =
Expm1(
d, abs_x);
1523 return Xor(z, sign);
1526template <
class D,
class V>
1529 const V kLimit =
Set(
d,
static_cast<T
>(18.714973875));
1530 const V kOne =
Set(
d,
static_cast<T
>(+1.0));
1531 const V kTwo =
Set(
d,
static_cast<T
>(+2.0));
1534 const V abs_x =
Xor(x, sign);
1537 return Xor(z, sign);
1540template <
class D,
class V>
1544 impl.SinCos(
d, x, s, c);
#define HWY_NOINLINE
Definition base.h:103
#define HWY_INLINE
Definition base.h:101
#define HWY_MAYBE_UNUSED
Definition base.h:113
HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1)
Definition math-inl.h:348
HWY_INLINE V Log(const D d, V x)
Definition math-inl.h:948
HWY_INLINE void SinCos6(D d, TFromD< D > dp1, TFromD< D > dp2, TFromD< D > dp3, V x, V &s, V &c)
Definition math-inl.h:1118
HWY_INLINE void SinCos3(D d, TFromD< D > dp1, TFromD< D > dp2, TFromD< D > dp3, V x, V &s, V &c)
Definition math-inl.h:1019
typename D::template Rebind< T > Rebind
Definition ops/shared-inl.h:460
HWY_NOINLINE V CallSin(const D d, VecArg< V > x)
Definition math-inl.h:287
HWY_NOINLINE V CallSinCos(const D d, VecArg< V > x, VecArg< V > &s, VecArg< V > &c)
Definition math-inl.h:334
HWY_NOINLINE V CallAsin(const D d, VecArg< V > x)
Definition math-inl.h:74
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7339
HWY_API svbool_t IsInf(const V v)
Definition arm_sve-inl.h:1709
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7331
HWY_API Mask128< T, N > IsNaN(const Vec128< T, N > v)
Definition arm_neon-inl.h:5093
D d
Definition arm_sve-inl.h:1915
HWY_NOINLINE V CallAcos(const D d, VecArg< V > x)
Definition math-inl.h:44
HWY_API V IfThenElse(MFromD< DFromV< V > > mask, V yes, V no)
Definition arm_neon-inl.h:2992
HWY_NOINLINE V CallAtan2(const D d, VecArg< V > y, VecArg< V > x)
Definition math-inl.h:167
HWY_API VFromD< D > BitCast(D d, Vec128< FromT, Repartition< FromT, D >().MaxLanes()> v)
Definition arm_neon-inl.h:1581
V VecArg
Definition ops/shared-inl.h:69
HWY_API Vec128< T, N > Sqrt(const Vec128< T, N > v)
Definition arm_neon-inl.h:2654
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2690
HWY_INLINE void SinCos(D d, V x, V &s, V &c)
Highway SIMD version of SinCos.
Definition math-inl.h:1541
HWY_API Vec< D > NaN(D d)
Definition generic_ops-inl.h:82
HWY_API Vec128< T, N > MulAdd(Vec128< T, N > mul, Vec128< T, N > x, Vec128< T, N > add)
Definition arm_neon-inl.h:2550
HWY_API Vec128< T, N > IfThenZeroElse(Mask128< T, N > mask, Vec128< T, N > no)
Definition arm_neon-inl.h:3019
HWY_INLINE V Asin(D d, V x)
Highway SIMD version of std::asin(x).
Definition math-inl.h:1303
HWY_INLINE V Cos(D d, V x)
Highway SIMD version of std::cos(x).
Definition math-inl.h:1387
HWY_INLINE V Atan2(const D d, V y, V x)
Highway SIMD version of std::atan2(x).
Definition math-inl.h:138
HWY_API Vec128< T, N > CopySignToAbs(Vec128< T, N > abs, Vec128< T, N > sign)
Definition arm_neon-inl.h:2932
HWY_API Vec128< T, N > MulSub(Vec128< T, N > mul, Vec128< T, N > x, Vec128< T, N > sub)
Definition arm_neon-inl.h:2612
HWY_NOINLINE V CallExpm1(const D d, VecArg< V > x)
Definition math-inl.h:212
HWY_INLINE V Acos(D d, V x)
Highway SIMD version of std::acos(x).
Definition math-inl.h:1250
HWY_API VFromD< D > Zero(D d)
Definition arm_neon-inl.h:947
HWY_API Vec128< float > ConvertTo(D, Vec128< int32_t > v)
Definition arm_neon-inl.h:3971
HWY_INLINE V Log(D d, V x)
Highway SIMD version of std::log(x).
Definition math-inl.h:1459
HWY_API Vec128< int64_t > Abs(const Vec128< int64_t > v)
Definition arm_neon-inl.h:3271
typename D::T TFromD
Definition ops/shared-inl.h:426
HWY_NOINLINE V CallLog1p(const D d, VecArg< V > x)
Definition math-inl.h:257
HWY_INLINE V Acosh(D d, V x)
Highway SIMD version of std::acosh(x).
Definition math-inl.h:1276
HWY_NOINLINE V CallLog10(const D d, VecArg< V > x)
Definition math-inl.h:242
HWY_INLINE V Tanh(D d, V x)
Highway SIMD version of std::tanh(x).
Definition math-inl.h:1527
HWY_API V Add(V a, V b)
Definition generic_ops-inl.h:7300
HWY_API Vec128< T, N > NegMulAdd(Vec128< T, N > mul, Vec128< T, N > x, Vec128< T, N > add)
Definition arm_neon-inl.h:2556
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2739
HWY_NOINLINE V CallLog2(const D d, VecArg< V > x)
Definition math-inl.h:272
HWY_NOINLINE V CallExp(const D d, VecArg< V > x)
Definition math-inl.h:197
HWY_NOINLINE V CallAtanh(const D d, VecArg< V > x)
Definition math-inl.h:119
HWY_INLINE V Exp(D d, V x)
Highway SIMD version of std::exp(x).
Definition math-inl.h:1410
HWY_API Vec64< uint16_t > DemoteTo(D, Vec128< int32_t > v)
Definition arm_neon-inl.h:4629
HWY_NOINLINE V CallAtan(const D d, VecArg< V > x)
Definition math-inl.h:104
HWY_API Vec< D > SignBit(D d)
Definition generic_ops-inl.h:75
HWY_INLINE V Log10(D d, V x)
Highway SIMD version of std::log10(x).
Definition math-inl.h:1464
HWY_INLINE V Asinh(D d, V x)
Highway SIMD version of std::asinh(x).
Definition math-inl.h:1324
HWY_NOINLINE V CallLog(const D d, VecArg< V > x)
Definition math-inl.h:227
HWY_INLINE V Log2(D d, V x)
Highway SIMD version of std::log2(x).
Definition math-inl.h:1483
HWY_API svbool_t Gt(const V a, const V b)
Definition arm_sve-inl.h:1578
HWY_API Vec128< uint16_t > PromoteTo(D, Vec64< uint8_t > v)
Definition arm_neon-inl.h:4252
HWY_NOINLINE V CallAsinh(const D d, VecArg< V > x)
Definition math-inl.h:89
decltype(MaskFromVec(Zero(D()))) Mask
Definition generic_ops-inl.h:52
HWY_API V Sub(V a, V b)
Definition generic_ops-inl.h:7304
HWY_API MFromD< DTo > RebindMask(DTo, Mask128< TFrom, NFrom > m)
Definition arm_neon-inl.h:2969
HWY_INLINE Vec128< TFromD< D > > Set(D, T t)
Definition arm_neon-inl.h:931
HWY_INLINE V Sin(D d, V x)
Highway SIMD version of std::sin(x).
Definition math-inl.h:1489
HWY_INLINE V Atanh(D d, V x)
Highway SIMD version of std::atanh(x).
Definition math-inl.h:1374
HWY_API V Div(V a, V b)
Definition arm_sve-inl.h:4639
HWY_API Vec128< T, N > IfThenElseZero(Mask128< T, N > mask, Vec128< T, N > yes)
Definition arm_neon-inl.h:3007
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2727
HWY_INLINE V Atan(D d, V x)
Highway SIMD version of std::atan(x).
Definition math-inl.h:1357
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:46
HWY_NOINLINE V CallCos(const D d, VecArg< V > x)
Definition math-inl.h:182
HWY_NOINLINE V CallSinh(const D d, VecArg< V > x)
Definition math-inl.h:302
HWY_INLINE V Expm1(D d, V x)
Highway SIMD version of std::expm1(x).
Definition math-inl.h:1433
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition arm_neon-inl.h:2705
HWY_API V Mul(V a, V b)
Definition generic_ops-inl.h:7309
HWY_NOINLINE V CallTanh(const D d, VecArg< V > x)
Definition math-inl.h:317
HWY_INLINE V Sinh(D d, V x)
Highway SIMD version of std::sinh(x).
Definition math-inl.h:1513
HWY_API Vec128< T, N > CopySign(Vec128< T, N > magn, Vec128< T, N > sign)
Definition arm_neon-inl.h:2924
HWY_API Vec128< T, N > ShiftLeft(Vec128< T, N > v)
Definition emu128-inl.h:476
HWY_INLINE V Log1p(D d, V x)
Highway SIMD version of std::log1p(x).
Definition math-inl.h:1470
HWY_API svbool_t Ge(const V a, const V b)
Definition arm_sve-inl.h:1582
HWY_NOINLINE V CallAcosh(const D d, VecArg< V > x)
Definition math-inl.h:59
typename detail::Relations< T >::Signed MakeSigned
Definition base.h:2080
#define HWY_NAMESPACE
Definition set_macros-inl.h:166
HWY_INLINE V AsinPoly(D d, V x2, V)
Definition math-inl.h:548
Definition math-inl.h:532
HWY_INLINE V AtanPoly(D d, V x)
Definition math-inl.h:589
Definition math-inl.h:534
HWY_INLINE Vec< Rebind< float, D > > SinSignFromQuadrant(D d, VI32 q)
Definition math-inl.h:702
HWY_INLINE Vec< Rebind< float, D > > CosSignFromQuadrant(D d, VI32 q)
Definition math-inl.h:695
HWY_INLINE Vec< Rebind< int32_t, D > > ToInt32(D, V x)
Definition math-inl.h:644
HWY_INLINE V SinReduce(D d, V x, VI32 q)
Definition math-inl.h:677
HWY_INLINE V Poly(D d, V x)
Definition math-inl.h:649
HWY_INLINE V CosReduce(D d, V x, VI32 q)
Definition math-inl.h:660
Definition math-inl.h:536
HWY_INLINE Vec< D > Pow2I(D d, VI32 x)
Definition math-inl.h:809
HWY_INLINE V ExpReduce(D d, V x, VI32 q)
Definition math-inl.h:823
HWY_INLINE V ExpPoly(D d, V x)
Definition math-inl.h:796
HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e)
Definition math-inl.h:817
HWY_INLINE Vec< Rebind< int32_t, D > > ToInt32(D, V x)
Definition math-inl.h:791
Definition math-inl.h:538
HWY_INLINE Vec< Rebind< int32_t, D > > Log2p1NoSubnormal(D, V x)
Definition math-inl.h:839
HWY_INLINE V LogPoly(D d, V x)
Definition math-inl.h:848
Definition math-inl.h:540
HWY_INLINE void SinCos(D d, V x, V &s, V &c)
Definition math-inl.h:1230
Definition math-inl.h:542