16#ifndef HIGHWAY_HWY_BASE_H_
17#define HIGHWAY_HWY_BASE_H_
28#if HWY_COMPILER_MSVC && defined(_MSVC_LANG) && _MSVC_LANG > __cplusplus
29#define HWY_CXX_LANG _MSVC_LANG
31#define HWY_CXX_LANG __cplusplus
37#if !defined(HWY_NO_LIBCXX)
38#ifndef __STDC_FORMAT_MACROS
39#define __STDC_FORMAT_MACROS
44#if (HWY_ARCH_X86 && !defined(HWY_NO_LIBCXX)) || HWY_COMPILER_MSVC
50#if !defined(HWY_NO_LIBCXX) && HWY_CXX_LANG > 201703L && \
51 __cpp_impl_three_way_comparison >= 201907L && defined(__has_include) && \
52 !defined(HWY_DISABLE_CXX20_THREE_WAY_COMPARE)
53#if __has_include(<compare>)
55#define HWY_HAVE_CXX20_THREE_WAY_COMPARE 1
68#define HWY_STR_IMPL(macro) #macro
69#define HWY_STR(macro) HWY_STR_IMPL(macro)
75#define HWY_RESTRICT __restrict
76#define HWY_INLINE __forceinline
77#define HWY_NOINLINE __declspec(noinline)
79#define HWY_NORETURN __declspec(noreturn)
80#define HWY_LIKELY(expr) (expr)
81#define HWY_UNLIKELY(expr) (expr)
82#define HWY_PRAGMA(tokens) __pragma(tokens)
83#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
84#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
85#define HWY_MAYBE_UNUSED
86#define HWY_HAS_ASSUME_ALIGNED 0
88#define HWY_MUST_USE_RESULT _Check_return_
90#define HWY_MUST_USE_RESULT
95#define HWY_RESTRICT __restrict__
99#define HWY_INLINE inline __attribute__((always_inline))
101#define HWY_INLINE inline
103#define HWY_NOINLINE __attribute__((noinline))
104#define HWY_FLATTEN __attribute__((flatten))
105#define HWY_NORETURN __attribute__((noreturn))
106#define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
107#define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
108#define HWY_PRAGMA(tokens) _Pragma(#tokens)
109#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
110#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
113#define HWY_MAYBE_UNUSED __attribute__((unused))
114#define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
124#if HWY_HAS_ATTRIBUTE(__format__)
125#define HWY_FORMAT(idx_fmt, idx_arg) \
126 __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
128#define HWY_FORMAT(idx_fmt, idx_arg)
136#if HWY_HAS_BUILTIN(__builtin_assume_aligned)
137#define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
139#define HWY_ASSUME_ALIGNED(ptr, align) (ptr)
144#define HWY_RCAST_ALIGNED(type, ptr) \
145 reinterpret_cast<type>(HWY_ASSUME_ALIGNED((ptr), alignof(RemovePtr<type>)))
152#define HWY_PUSH_ATTRIBUTES(targets_str)
153#define HWY_POP_ATTRIBUTES
154#elif HWY_COMPILER_CLANG
155#define HWY_PUSH_ATTRIBUTES(targets_str) \
156 HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
157 apply_to = function))
158#define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
159#elif HWY_COMPILER_GCC_ACTUAL
160#define HWY_PUSH_ATTRIBUTES(targets_str) \
161 HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
162#define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
164#define HWY_PUSH_ATTRIBUTES(targets_str)
165#define HWY_POP_ATTRIBUTES
171#define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
173#define HWY_CONCAT_IMPL(a, b) a##b
174#define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
176#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
177#define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
179#if HWY_COMPILER_GCC_ACTUAL
181#define HWY_UNROLL(factor) HWY_PRAGMA(GCC unroll factor)
182#define HWY_DEFAULT_UNROLL HWY_UNROLL(4)
183#elif HWY_COMPILER_CLANG || HWY_COMPILER_ICC || HWY_COMPILER_ICX
184#define HWY_UNROLL(factor) HWY_PRAGMA(unroll factor)
185#define HWY_DEFAULT_UNROLL HWY_UNROLL()
187#define HWY_UNROLL(factor)
188#define HWY_DEFAULT_UNROLL
201#if HWY_HAS_CPP_ATTRIBUTE(assume)
202#define HWY_ASSUME(expr) [[assume(expr)]]
203#elif HWY_COMPILER_MSVC || HWY_COMPILER_ICC
204#define HWY_ASSUME(expr) __assume(expr)
206#elif HWY_COMPILER_CLANG && HWY_HAS_BUILTIN(__builtin_assume)
207#define HWY_ASSUME(expr) __builtin_assume(expr)
210#elif HWY_COMPILER_GCC_ACTUAL >= 405
211#define HWY_ASSUME(expr) \
212 ((expr) ? static_cast<void>(0) : __builtin_unreachable())
214#define HWY_ASSUME(expr) static_cast<void>(0)
220#if HWY_ARCH_X86 && !defined(HWY_NO_LIBCXX)
221#define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
228#define HWY_REP4(literal) literal, literal, literal, literal
233#define HWY_ABORT(format, ...) \
234 ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
237#define HWY_ASSERT(condition) \
239 if (!(condition)) { \
240 HWY_ABORT("Assert %s", #condition); \
244#if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
250#if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER)
256#if HWY_HAS_FEATURE(thread_sanitizer) || defined(THREAD_SANITIZER)
262#if HWY_HAS_FEATURE(undefined_behavior_sanitizer) || \
263 defined(UNDEFINED_BEHAVIOR_SANITIZER)
264#define HWY_IS_UBSAN 1
266#define HWY_IS_UBSAN 0
272#define HWY_ATTR_NO_MSAN __attribute__((no_sanitize_memory))
274#define HWY_ATTR_NO_MSAN
278#if !defined(HWY_IS_DEBUG_BUILD)
281#if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || HWY_IS_ASAN || \
282 HWY_IS_MSAN || HWY_IS_TSAN || HWY_IS_UBSAN || defined(__clang_analyzer__)
283#define HWY_IS_DEBUG_BUILD 1
285#define HWY_IS_DEBUG_BUILD 0
289#if HWY_IS_DEBUG_BUILD
290#define HWY_DASSERT(condition) HWY_ASSERT(condition)
292#define HWY_DASSERT(condition) \
296#if __cpp_constexpr >= 201603L
297#define HWY_CXX17_CONSTEXPR constexpr
299#define HWY_CXX17_CONSTEXPR
301#if __cpp_constexpr >= 201304L
302#define HWY_CXX14_CONSTEXPR constexpr
304#define HWY_CXX14_CONSTEXPR
307#if HWY_CXX_LANG >= 201703L
308#define HWY_IF_CONSTEXPR if constexpr
310#define HWY_IF_CONSTEXPR if
313#ifndef HWY_HAVE_CXX20_THREE_WAY_COMPARE
314#define HWY_HAVE_CXX20_THREE_WAY_COMPARE 0
321#pragma intrinsic(memcpy)
322#pragma intrinsic(memset)
326template <
size_t kBytes,
typename From,
typename To>
329 memcpy(to, from, kBytes);
331 __builtin_memcpy(to, from, kBytes);
336 size_t num_of_bytes_to_copy) {
338 memcpy(to, from, num_of_bytes_to_copy);
340 __builtin_memcpy(to, from, num_of_bytes_to_copy);
345template <
typename From,
typename To>
347 static_assert(
sizeof(From) ==
sizeof(To),
"");
348 CopyBytes<sizeof(From)>(from, to);
351template <
size_t kBytes,
typename To>
354 memset(to, 0, kBytes);
356 __builtin_memset(to, 0, kBytes);
362 memset(to, 0, num_bytes);
364 __builtin_memset(to, 0, num_bytes);
373#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
374 __riscv_v_intrinsic >= 11000
388#define HWY_ALIGN_MAX alignas(64)
389#elif HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
390 __riscv_v_intrinsic >= 11000
391#define HWY_ALIGN_MAX alignas(8)
393#define HWY_ALIGN_MAX alignas(16)
444 return a.
lo == b.
lo && a.
hi == b.
hi;
478template <
bool Condition>
485template <
bool Condition>
488template <
typename T,
typename U>
498template <
typename T,
typename U>
504template <
typename T,
typename U1,
typename U2>
509template <
bool Condition,
typename Then,
typename Else>
514template <
class Then,
class Else>
515struct IfT<false, Then, Else> {
519template <
bool Condition,
typename Then,
typename Else>
609#define HWY_IF_V_SIZE(T, kN, bytes) \
610 hwy::EnableIf<kN * sizeof(T) == bytes>* = nullptr
611#define HWY_IF_V_SIZE_LE(T, kN, bytes) \
612 hwy::EnableIf<kN * sizeof(T) <= bytes>* = nullptr
613#define HWY_IF_V_SIZE_GT(T, kN, bytes) \
614 hwy::EnableIf<(kN * sizeof(T) > bytes)>* = nullptr
616#define HWY_IF_LANES(kN, lanes) hwy::EnableIf<(kN == lanes)>* = nullptr
617#define HWY_IF_LANES_LE(kN, lanes) hwy::EnableIf<(kN <= lanes)>* = nullptr
618#define HWY_IF_LANES_GT(kN, lanes) hwy::EnableIf<(kN > lanes)>* = nullptr
620#define HWY_IF_UNSIGNED(T) hwy::EnableIf<!hwy::IsSigned<T>()>* = nullptr
621#define HWY_IF_NOT_UNSIGNED(T) hwy::EnableIf<hwy::IsSigned<T>()>* = nullptr
622#define HWY_IF_SIGNED(T) \
623 hwy::EnableIf<hwy::IsSigned<T>() && !hwy::IsFloat<T>() && \
624 !hwy::IsSpecialFloat<T>()>* = nullptr
625#define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
626#define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
627#define HWY_IF_FLOAT3264(T) hwy::EnableIf<hwy::IsFloat3264<T>()>* = nullptr
628#define HWY_IF_NOT_FLOAT3264(T) hwy::EnableIf<!hwy::IsFloat3264<T>()>* = nullptr
629#define HWY_IF_SPECIAL_FLOAT(T) \
630 hwy::EnableIf<hwy::IsSpecialFloat<T>()>* = nullptr
631#define HWY_IF_NOT_SPECIAL_FLOAT(T) \
632 hwy::EnableIf<!hwy::IsSpecialFloat<T>()>* = nullptr
633#define HWY_IF_FLOAT_OR_SPECIAL(T) \
634 hwy::EnableIf<hwy::IsFloat<T>() || hwy::IsSpecialFloat<T>()>* = nullptr
635#define HWY_IF_NOT_FLOAT_NOR_SPECIAL(T) \
636 hwy::EnableIf<!hwy::IsFloat<T>() && !hwy::IsSpecialFloat<T>()>* = nullptr
637#define HWY_IF_INTEGER(T) hwy::EnableIf<hwy::IsInteger<T>()>* = nullptr
639#define HWY_IF_T_SIZE(T, bytes) hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
640#define HWY_IF_NOT_T_SIZE(T, bytes) \
641 hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
645#define HWY_IF_T_SIZE_ONE_OF(T, bit_array) \
646 hwy::EnableIf<((size_t{1} << sizeof(T)) & (bit_array)) != 0>* = nullptr
647#define HWY_IF_T_SIZE_LE(T, bytes) \
648 hwy::EnableIf<(sizeof(T) <= (bytes))>* = nullptr
649#define HWY_IF_T_SIZE_GT(T, bytes) \
650 hwy::EnableIf<(sizeof(T) > (bytes))>* = nullptr
652#define HWY_IF_SAME(T, expected) \
653 hwy::EnableIf<hwy::IsSame<hwy::RemoveCvRef<T>, expected>()>* = nullptr
654#define HWY_IF_NOT_SAME(T, expected) \
655 hwy::EnableIf<!hwy::IsSame<hwy::RemoveCvRef<T>, expected>()>* = nullptr
658#define HWY_IF_SAME2(T, expected1, expected2) \
660 hwy::IsSameEither<hwy::RemoveCvRef<T>, expected1, expected2>()>* = \
663#define HWY_IF_U8(T) HWY_IF_SAME(T, uint8_t)
664#define HWY_IF_U16(T) HWY_IF_SAME(T, uint16_t)
665#define HWY_IF_U32(T) HWY_IF_SAME(T, uint32_t)
666#define HWY_IF_U64(T) HWY_IF_SAME(T, uint64_t)
668#define HWY_IF_I8(T) HWY_IF_SAME(T, int8_t)
669#define HWY_IF_I16(T) HWY_IF_SAME(T, int16_t)
670#define HWY_IF_I32(T) HWY_IF_SAME(T, int32_t)
671#define HWY_IF_I64(T) HWY_IF_SAME(T, int64_t)
673#define HWY_IF_BF16(T) HWY_IF_SAME(T, hwy::bfloat16_t)
674#define HWY_IF_NOT_BF16(T) HWY_IF_NOT_SAME(T, hwy::bfloat16_t)
676#define HWY_IF_F16(T) HWY_IF_SAME(T, hwy::float16_t)
677#define HWY_IF_NOT_F16(T) HWY_IF_NOT_SAME(T, hwy::float16_t)
679#define HWY_IF_F32(T) HWY_IF_SAME(T, float)
680#define HWY_IF_F64(T) HWY_IF_SAME(T, double)
684#define HWY_IF_UI8(T) HWY_IF_SAME2(T, uint8_t, int8_t)
685#define HWY_IF_UI16(T) HWY_IF_SAME2(T, uint16_t, int16_t)
686#define HWY_IF_UI32(T) HWY_IF_SAME2(T, uint32_t, int32_t)
687#define HWY_IF_UI64(T) HWY_IF_SAME2(T, uint64_t, int64_t)
689#define HWY_IF_LANES_PER_BLOCK(T, N, LANES) \
690 hwy::EnableIf<HWY_MIN(sizeof(T) * N, 16) / sizeof(T) == (LANES)>* = nullptr
699 template <
class U,
class URef = U&&>
701 template <
class U,
class Arg>
705 using type =
decltype(TryAddRValRef<T>(0));
716 "DeclVal() cannot be used in an evaluated context");
729template <
class T,
size_t N>
744template <class From, class To>
750 template <
class T,
class U>
755 template <
class T,
class U,
class Arg>
760 value = (IsSame<RemoveConst<RemoveVolatile<From>>,
void>() &&
763 (IsSame<To, decltype(DeclVal<To>())>() ||
773template <
class From,
class To>
778template <
class From,
class To>
781 template <class T, class U, class = decltype(static_cast<U>(DeclVal<T>()))>
784 template <
class T,
class U,
class Arg>
789 value = IsSame<decltype(TryStaticCastTest<From, To>(0)),
hwy::SizeTag<1>>()
793template <
class From,
class To>
798#define HWY_IF_CASTABLE(From, To) \
799 hwy::EnableIf<IsStaticCastable<From, To>()>* = nullptr
801#define HWY_IF_OP_CASTABLE(op, T, Native) \
802 HWY_IF_CASTABLE(decltype(DeclVal<Native>() op DeclVal<T>()), Native)
804template <
class T,
class From>
807 template <class T1, class T2, class = decltype(DeclVal<T1>() = DeclVal<T2>())>
810 template <
class T1,
class T2,
class Arg>
819template <
class T,
class From>
824#define HWY_IF_ASSIGNABLE(T, From) \
825 hwy::EnableIf<IsAssignable<T, From>()>* = nullptr
881 return IsIntegerLaneType<T>() || IsSame<RemoveCvRef<T>,
wchar_t>() ||
883 IsSameEither<RemoveCvRef<T>, intptr_t, uintptr_t>();
933#if defined(__cpp_char8_t) && __cpp_char8_t >= 201811L
935HWY_INLINE constexpr bool IsInteger<char8_t>() {
951#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
952#define HWY_BITCASTSCALAR_CONSTEXPR constexpr
954#define HWY_BITCASTSCALAR_CONSTEXPR
957#if __cpp_constexpr >= 201304L
958#define HWY_BITCASTSCALAR_CXX14_CONSTEXPR HWY_BITCASTSCALAR_CONSTEXPR
960#define HWY_BITCASTSCALAR_CXX14_CONSTEXPR
963#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
967struct BitCastScalarSrcCastHelper {
968 static HWY_INLINE constexpr const From& CastSrcValRef(
const From& val) {
973#if HWY_COMPILER_CLANG >= 900 && HWY_COMPILER_CLANG < 1000
975template <
class To,
class From,
979BuiltinBitCastScalar(
const From& val) {
980 static_assert(
sizeof(To) ==
sizeof(From),
981 "sizeof(To) == sizeof(From) must be true");
982 return static_cast<To
>(val);
985template <
class To,
class From,
989BuiltinBitCastScalar(
const From& val) {
990 return __builtin_bit_cast(To, val);
996template <
class To,
class From, HWY_IF_NOT_SPECIAL_FLOAT(To)>
1004#if HWY_COMPILER_CLANG >= 900 && HWY_COMPILER_CLANG < 1000
1005 return detail::BuiltinBitCastScalar<To>(
1006 detail::BitCastScalarSrcCastHelper<RemoveCvRef<From>>::CastSrcValRef(
1009 return __builtin_bit_cast(
1010 To, detail::BitCastScalarSrcCastHelper<RemoveCvRef<From>>::CastSrcValRef(
1014template <
class To,
class From, HWY_IF_SPECIAL_FLOAT(To)>
1020 return To::FromBits(BitCastScalar<uint16_t>(val));
1023template <
class To,
class From>
1034#pragma pack(push, 1)
1039#if (HWY_ARCH_ARM_A64 && !HWY_COMPILER_MSVC) || \
1040 (HWY_COMPILER_CLANG && defined(__ARM_FP) && (__ARM_FP & 2)) || \
1041 (HWY_COMPILER_GCC_ACTUAL && defined(__ARM_FP16_FORMAT_IEEE))
1042#define HWY_NEON_HAVE_F16C 1
1044#define HWY_NEON_HAVE_F16C 0
1049#if HWY_ARCH_RISCV && defined(__riscv_zvfh) && HWY_COMPILER_CLANG >= 1600
1050#define HWY_RVV_HAVE_F16_VEC 1
1052#define HWY_RVV_HAVE_F16_VEC 0
1057#if HWY_ARCH_X86 && defined(__SSE2__) && defined(__FLT16_MAX__) && \
1058 ((HWY_COMPILER_CLANG >= 1500 && !HWY_COMPILER_CLANGCL) || \
1059 HWY_COMPILER_GCC_ACTUAL >= 1200)
1060#define HWY_SSE2_HAVE_F16_TYPE 1
1062#define HWY_SSE2_HAVE_F16_TYPE 0
1065#ifndef HWY_HAVE_SCALAR_F16_TYPE
1067#if HWY_NEON_HAVE_F16C || HWY_RVV_HAVE_F16_VEC || HWY_SSE2_HAVE_F16_TYPE
1068#define HWY_HAVE_SCALAR_F16_TYPE 1
1070#define HWY_HAVE_SCALAR_F16_TYPE 0
1074#ifndef HWY_HAVE_SCALAR_F16_OPERATORS
1076#if HWY_HAVE_SCALAR_F16_TYPE && \
1077 (HWY_COMPILER_CLANG >= 1800 || HWY_COMPILER_GCC_ACTUAL >= 1200 || \
1078 (HWY_COMPILER_CLANG >= 1500 && !HWY_COMPILER_CLANGCL && \
1079 !defined(_WIN32)) || \
1081 (HWY_COMPILER_CLANG >= 900 || HWY_COMPILER_GCC_ACTUAL >= 800)))
1082#define HWY_HAVE_SCALAR_F16_OPERATORS 1
1084#define HWY_HAVE_SCALAR_F16_OPERATORS 0
1090template <
class T,
class TVal = RemoveCvRef<T>,
bool = IsSpecialFloat<TVal>()>
1093template <
class T,
class TVal>
1102template <
class T,
class TVal = RemoveCvRef<T>>
1118#if HWY_HAVE_SCALAR_F16_TYPE
1119#if HWY_RVV_HAVE_F16_VEC || HWY_SSE2_HAVE_F16_TYPE
1120 using Native = _Float16;
1121#elif HWY_NEON_HAVE_F16C
1122 using Native = __fp16;
1124#error "Logic error: condition should be 'all but NEON_HAVE_F16C'"
1129#if HWY_HAVE_SCALAR_F16_TYPE
1145#if HWY_HAVE_SCALAR_F16_TYPE
1148 constexpr float16_t(Native arg) noexcept : native(arg) {}
1149 constexpr operator Native() const noexcept {
return native; }
1152#if HWY_HAVE_SCALAR_F16_TYPE
1154 return float16_t(BitCastScalar<Native>(bits));
1172#if HWY_HAVE_SCALAR_F16_OPERATORS || HWY_IDE
1173 template <
typename T, hwy::EnableIf<!IsSame<RemoveCvRef<T>,
float16_t>() &&
1174 IsConvertible<T, Native>()>* =
nullptr>
1176 : native(
static_cast<Native
>(
static_cast<T&&
>(arg))) {}
1178 template <
typename T, hwy::EnableIf<!IsSame<RemoveCvRef<T>,
float16_t>() &&
1179 !IsConvertible<T, Native>() &&
1180 IsStaticCastable<T, Native>()>* =
nullptr>
1181 explicit constexpr float16_t(T&& arg) noexcept
1182 : native(
static_cast<Native
>(
static_cast<T&&
>(arg))) {}
1186 native =
static_cast<Native
>(native - Native{1});
1192 float16_t result = *
this;
1193 native =
static_cast<Native
>(native - Native{1});
1199 native =
static_cast<Native
>(native + Native{1});
1205 float16_t result = *
this;
1206 native =
static_cast<Native
>(native + Native{1});
1210 constexpr float16_t
operator-() const noexcept {
1211 return float16_t(
static_cast<Native
>(-native));
1213 constexpr float16_t
operator+() const noexcept {
return *
this; }
1217#define HWY_FLOAT16_BINARY_OP(op, op_func, assign_func) \
1218 constexpr float16_t op_func(const float16_t& rhs) const noexcept { \
1219 return float16_t(static_cast<Native>(native op rhs.native)); \
1221 template <typename T, HWY_IF_NOT_F16(T), \
1222 typename UnwrappedT = \
1223 detail::SpecialFloatUnwrapArithOpOperand<const T&>, \
1224 typename RawResultT = \
1225 decltype(DeclVal<Native>() op DeclVal<UnwrappedT>()), \
1226 typename ResultT = \
1227 detail::NativeSpecialFloatToWrapper<RawResultT>, \
1228 HWY_IF_CASTABLE(RawResultT, ResultT)> \
1229 constexpr ResultT op_func(const T& rhs) const noexcept(noexcept( \
1230 static_cast<ResultT>(DeclVal<Native>() op DeclVal<UnwrappedT>()))) { \
1231 return static_cast<ResultT>(native op static_cast<UnwrappedT>(rhs)); \
1233 HWY_CXX14_CONSTEXPR hwy::float16_t& assign_func( \
1234 const hwy::float16_t& rhs) noexcept { \
1235 native = static_cast<Native>(native op rhs.native); \
1238 template <typename T, HWY_IF_NOT_F16(T), \
1239 HWY_IF_OP_CASTABLE(op, const T&, Native), \
1240 HWY_IF_ASSIGNABLE( \
1241 Native, decltype(DeclVal<Native>() op DeclVal<const T&>()))> \
1242 HWY_CXX14_CONSTEXPR hwy::float16_t& assign_func(const T& rhs) noexcept( \
1244 static_cast<Native>(DeclVal<Native>() op DeclVal<const T&>()))) { \
1245 native = static_cast<Native>(native op rhs); \
1249 HWY_FLOAT16_BINARY_OP(+,
operator+,
operator+=)
1250 HWY_FLOAT16_BINARY_OP(-, operator-, operator-=)
1251 HWY_FLOAT16_BINARY_OP(*, operator*, operator*=)
1252 HWY_FLOAT16_BINARY_OP(/, operator/, operator/=)
1253#undef HWY_FLOAT16_BINARY_OP
1257static_assert(
sizeof(
hwy::float16_t) == 2,
"Wrong size of float16_t");
1259#if HWY_HAVE_SCALAR_F16_TYPE
1262#if HWY_HAVE_SCALAR_F16_OPERATORS
1264struct SpecialFloatUnwrapArithOpOperandT<T, hwy::float16_t, true> {
1265 using type = hwy::float16_t::Native;
1270struct NativeSpecialFloatToWrapperT<T, hwy::float16_t::Native> {
1277#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
1281struct BitCastScalarSrcCastHelper<hwy::float16_t> {
1282#if HWY_HAVE_SCALAR_F16_TYPE
1283 static HWY_INLINE constexpr const hwy::float16_t::Native& CastSrcValRef(
1288 static HWY_INLINE constexpr const uint16_t& CastSrcValRef(
1298#if HWY_HAVE_SCALAR_F16_OPERATORS
1299#define HWY_F16_CONSTEXPR constexpr
1301#define HWY_F16_CONSTEXPR HWY_BITCASTSCALAR_CXX14_CONSTEXPR
1305#if HWY_HAVE_SCALAR_F16_OPERATORS && !HWY_IDE
1306 return static_cast<float>(f16);
1308#if !HWY_HAVE_SCALAR_F16_OPERATORS || HWY_IDE
1309 const uint16_t bits16 = BitCastScalar<uint16_t>(f16);
1310 const uint32_t sign =
static_cast<uint32_t
>(bits16 >> 15);
1311 const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
1312 const uint32_t mantissa = bits16 & 0x3FF;
1315 if (biased_exp == 0) {
1316 const float subnormal =
1317 (1.0f / 16384) * (
static_cast<float>(mantissa) * (1.0f / 1024));
1318 return sign ? -subnormal : subnormal;
1323 const uint32_t biased_exp32 =
1324 biased_exp == 31 ? 0xFF : biased_exp + (127 - 15);
1325 const uint32_t mantissa32 = mantissa << (23 - 10);
1326 const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
1328 return BitCastScalar<float>(bits32);
1332#if HWY_IS_DEBUG_BUILD && \
1333 (HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926)
1334#if defined(__cpp_if_consteval) && __cpp_if_consteval >= 202106L
1338#define HWY_F16_FROM_F32_DASSERT(condition) \
1341 HWY_DASSERT(condition); \
1344#elif HWY_HAS_BUILTIN(__builtin_is_constant_evaluated) || \
1345 HWY_COMPILER_MSVC >= 1926
1350#define HWY_F16_FROM_F32_DASSERT(condition) \
1352 if (!__builtin_is_constant_evaluated()) { \
1353 HWY_DASSERT(condition); \
1362#define HWY_F16_FROM_F32_DASSERT(condition) \
1370#define HWY_F16_FROM_F32_DASSERT(condition) HWY_DASSERT(condition)
1375#if HWY_HAVE_SCALAR_F16_OPERATORS && !HWY_IDE
1376 return float16_t(
static_cast<float16_t::Native
>(f32));
1378#if !HWY_HAVE_SCALAR_F16_OPERATORS || HWY_IDE
1379 const uint32_t bits32 = BitCastScalar<uint32_t>(f32);
1380 const uint32_t sign = bits32 >> 31;
1381 const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
1382 constexpr uint32_t kMantissaMask = 0x7FFFFF;
1383 const uint32_t mantissa32 = bits32 & kMantissaMask;
1397 const uint32_t odd_bit = (mantissa32 >> 13) & 1;
1398 const uint32_t rounded = mantissa32 + odd_bit + 0xFFF;
1399 const bool carry = rounded >= (1u << 23);
1401 const int32_t exp =
static_cast<int32_t
>(biased_exp32) - 127 + carry;
1406 return float16_t::FromBits(
static_cast<uint16_t
>(sign << 15));
1411 const bool is_nan = (biased_exp32 == 255) && mantissa32 != 0;
1412 const bool overflowed = exp >= 16;
1413 const uint32_t biased_exp16 =
1416 const uint32_t sub_exp =
static_cast<uint32_t
>(
HWY_MAX(-14 - exp, 0));
1418 const uint32_t shifted_mantissa =
1419 (rounded & kMantissaMask) >> (23 - 10 + sub_exp);
1420 const uint32_t leading = sub_exp == 0u ? 0u : (1024u >> sub_exp);
1421 const uint32_t mantissa16 = is_nan ? 0x3FF
1423 : (leading + shifted_mantissa);
1425#if HWY_IS_DEBUG_BUILD
1429 }
else if (exp <= 15) {
1436 const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
1438 const uint16_t narrowed =
static_cast<uint16_t
>(bits16);
1439 return float16_t::FromBits(narrowed);
1444#if HWY_HAVE_SCALAR_F16_OPERATORS
1445 return float16_t(
static_cast<float16_t::Native
>(f64));
1477 static_cast<float>(BitCastScalar<double>(
static_cast<uint64_t
>(
1478 (BitCastScalar<uint64_t>(f64) & 0xFFFFFFFFE0000000ULL) |
1479 ((BitCastScalar<uint64_t>(f64) + 0x000000001FFFFFFFULL) &
1480 0x0000000020000000ULL)))));
1488#if HWY_HAVE_SCALAR_F16_OPERATORS
1489 return lhs.native == rhs.native;
1496#if HWY_HAVE_SCALAR_F16_OPERATORS
1497 return lhs.native != rhs.native;
1503#if HWY_HAVE_SCALAR_F16_OPERATORS
1504 return lhs.native < rhs.native;
1511#if HWY_HAVE_SCALAR_F16_OPERATORS
1512 return lhs.native <= rhs.native;
1518#if HWY_HAVE_SCALAR_F16_OPERATORS
1519 return lhs.native > rhs.native;
1526#if HWY_HAVE_SCALAR_F16_OPERATORS
1527 return lhs.native >= rhs.native;
1532#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
1534 float16_t lhs, float16_t rhs)
noexcept {
1535#if HWY_HAVE_SCALAR_F16_OPERATORS
1536 return lhs.native <=> rhs.native;
1538 return F32FromF16(lhs) <=> F32FromF16(rhs);
1553#if HWY_ARCH_ARM_A64 && \
1554 (HWY_COMPILER_CLANG >= 1700 || HWY_COMPILER_GCC_ACTUAL >= 1400)
1555#define HWY_ARM_HAVE_SCALAR_BF16_TYPE 1
1557#define HWY_ARM_HAVE_SCALAR_BF16_TYPE 0
1561#ifndef HWY_SSE2_HAVE_SCALAR_BF16_TYPE
1562#if HWY_ARCH_X86 && defined(__SSE2__) && \
1563 ((HWY_COMPILER_CLANG >= 1700 && !HWY_COMPILER_CLANGCL) || \
1564 HWY_COMPILER_GCC_ACTUAL >= 1300)
1565#define HWY_SSE2_HAVE_SCALAR_BF16_TYPE 1
1567#define HWY_SSE2_HAVE_SCALAR_BF16_TYPE 0
1572#if HWY_ARM_HAVE_SCALAR_BF16_TYPE || HWY_SSE2_HAVE_SCALAR_BF16_TYPE
1573#define HWY_HAVE_SCALAR_BF16_TYPE 1
1575#define HWY_HAVE_SCALAR_BF16_TYPE 0
1578#ifndef HWY_HAVE_SCALAR_BF16_OPERATORS
1581#if HWY_HAVE_SCALAR_BF16_TYPE && (HWY_COMPILER_GCC_ACTUAL >= 1300)
1582#define HWY_HAVE_SCALAR_BF16_OPERATORS 1
1584#define HWY_HAVE_SCALAR_BF16_OPERATORS 0
1588#if HWY_HAVE_SCALAR_BF16_OPERATORS
1589#define HWY_BF16_CONSTEXPR constexpr
1591#define HWY_BF16_CONSTEXPR HWY_BITCASTSCALAR_CONSTEXPR
1595#if HWY_HAVE_SCALAR_BF16_TYPE
1596 using Native = __bf16;
1600#if HWY_HAVE_SCALAR_BF16_TYPE
1617#if HWY_HAVE_SCALAR_BF16_TYPE
1618 constexpr bfloat16_t(Native arg) noexcept : native(arg) {}
1619 constexpr operator Native() const noexcept {
return native; }
1622#if HWY_HAVE_SCALAR_BF16_TYPE
1624 return bfloat16_t(BitCastScalar<Native>(bits));
1642#if HWY_HAVE_SCALAR_BF16_OPERATORS || HWY_IDE
1643 template <
typename T, hwy::EnableIf<!IsSame<RemoveCvRef<T>, Native>() &&
1644 !IsSame<RemoveCvRef<T>, b
float16_t>() &&
1645 IsConvertible<T, Native>()>* =
nullptr>
1647 noexcept(
static_cast<Native
>(DeclVal<T>())))
1648 : native(static_cast<Native>(static_cast<T&&>(arg))) {}
1650 template <
typename T, hwy::EnableIf<!IsSame<RemoveCvRef<T>, Native>() &&
1651 !IsSame<RemoveCvRef<T>, b
float16_t>() &&
1652 !IsConvertible<T, Native>() &&
1653 IsStaticCastable<T, Native>()>* =
nullptr>
1654 explicit constexpr bfloat16_t(T&& arg)
noexcept(
1655 noexcept(
static_cast<Native
>(DeclVal<T>())))
1656 : native(static_cast<Native>(static_cast<T&&>(arg))) {}
1665 native =
static_cast<Native
>(native - Native{1});
1671 bfloat16_t result = *
this;
1672 native =
static_cast<Native
>(native - Native{1});
1678 native =
static_cast<Native
>(native + Native{1});
1684 bfloat16_t result = *
this;
1685 native =
static_cast<Native
>(native + Native{1});
1689 constexpr bfloat16_t
operator-() const noexcept {
1690 return bfloat16_t(
static_cast<Native
>(-native));
1692 constexpr bfloat16_t
operator+() const noexcept {
return *
this; }
1696#define HWY_BFLOAT16_BINARY_OP(op, op_func, assign_func) \
1697 constexpr bfloat16_t op_func(const bfloat16_t& rhs) const noexcept { \
1698 return bfloat16_t(static_cast<Native>(native op rhs.native)); \
1700 template <typename T, HWY_IF_NOT_BF16(T), \
1701 typename UnwrappedT = \
1702 detail::SpecialFloatUnwrapArithOpOperand<const T&>, \
1703 typename RawResultT = \
1704 decltype(DeclVal<Native>() op DeclVal<UnwrappedT>()), \
1705 typename ResultT = \
1706 detail::NativeSpecialFloatToWrapper<RawResultT>, \
1707 HWY_IF_CASTABLE(RawResultT, ResultT)> \
1708 constexpr ResultT op_func(const T& rhs) const noexcept(noexcept( \
1709 static_cast<ResultT>(DeclVal<Native>() op DeclVal<UnwrappedT>()))) { \
1710 return static_cast<ResultT>(native op static_cast<UnwrappedT>(rhs)); \
1712 HWY_CXX14_CONSTEXPR hwy::bfloat16_t& assign_func( \
1713 const hwy::bfloat16_t& rhs) noexcept { \
1714 native = static_cast<Native>(native op rhs.native); \
1717 template <typename T, HWY_IF_NOT_BF16(T), \
1718 HWY_IF_OP_CASTABLE(op, const T&, Native), \
1719 HWY_IF_ASSIGNABLE( \
1720 Native, decltype(DeclVal<Native>() op DeclVal<const T&>()))> \
1721 HWY_CXX14_CONSTEXPR hwy::bfloat16_t& assign_func(const T& rhs) noexcept( \
1723 static_cast<Native>(DeclVal<Native>() op DeclVal<const T&>()))) { \
1724 native = static_cast<Native>(native op rhs); \
1727 HWY_BFLOAT16_BINARY_OP(+,
operator+,
operator+=)
1728 HWY_BFLOAT16_BINARY_OP(-, operator-, operator-=)
1729 HWY_BFLOAT16_BINARY_OP(*, operator*, operator*=)
1730 HWY_BFLOAT16_BINARY_OP(/, operator/, operator/=)
1731#undef HWY_BFLOAT16_BINARY_OP
1735static_assert(
sizeof(
hwy::bfloat16_t) == 2,
"Wrong size of bfloat16_t");
1739#if HWY_HAVE_SCALAR_BF16_TYPE
1742#if HWY_HAVE_SCALAR_BF16_OPERATORS
1744struct SpecialFloatUnwrapArithOpOperandT<T,
hwy::bfloat16_t, true> {
1745 using type = hwy::bfloat16_t::Native;
1750struct NativeSpecialFloatToWrapperT<T,
hwy::bfloat16_t::Native> {
1757#if HWY_HAS_BUILTIN(__builtin_bit_cast) || HWY_COMPILER_MSVC >= 1926
1761struct BitCastScalarSrcCastHelper<
hwy::bfloat16_t> {
1762#if HWY_HAVE_SCALAR_BF16_TYPE
1763 static HWY_INLINE constexpr const hwy::bfloat16_t::Native& CastSrcValRef(
1768 static HWY_INLINE constexpr const uint16_t& CastSrcValRef(
1779#if HWY_HAVE_SCALAR_BF16_OPERATORS
1780 return static_cast<float>(bf);
1782 return BitCastScalar<float>(
static_cast<uint32_t
>(
1783 static_cast<uint32_t
>(BitCastScalar<uint16_t>(bf)) << 16));
1792 const uint32_t f32_bits) {
1793 return static_cast<uint32_t
>(((f32_bits & 0x7FFFFFFFu) < 0x7F800000u)
1794 ? (0x7FFFu + ((f32_bits >> 16) & 1u))
1801 const uint32_t f32_bits) {
1810 return static_cast<uint16_t
>(
1812 (
static_cast<uint32_t
>((f32_bits & 0x7FFFFFFFu) > 0x7F800000u) << 6));
1818#if HWY_HAVE_SCALAR_BF16_OPERATORS
1821 return bfloat16_t::FromBits(
1822 detail::F32BitsToBF16Bits(BitCastScalar<uint32_t>(f)));
1827#if HWY_HAVE_SCALAR_BF16_OPERATORS
1861 static_cast<float>(BitCastScalar<double>(
static_cast<uint64_t
>(
1862 (BitCastScalar<uint64_t>(f64) & 0xFFFFFFC000000000ULL) |
1863 ((BitCastScalar<uint64_t>(f64) + 0x0000003FFFFFFFFFULL) &
1864 0x0000004000000000ULL)))));
1873#if HWY_HAVE_SCALAR_BF16_OPERATORS
1874 return lhs.native == rhs.native;
1882#if HWY_HAVE_SCALAR_BF16_OPERATORS
1883 return lhs.native != rhs.native;
1890#if HWY_HAVE_SCALAR_BF16_OPERATORS
1891 return lhs.native < rhs.native;
1898#if HWY_HAVE_SCALAR_BF16_OPERATORS
1899 return lhs.native <= rhs.native;
1906#if HWY_HAVE_SCALAR_BF16_OPERATORS
1907 return lhs.native > rhs.native;
1914#if HWY_HAVE_SCALAR_BF16_OPERATORS
1915 return lhs.native >= rhs.native;
1920#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
1922 bfloat16_t lhs, bfloat16_t rhs)
noexcept {
1923#if HWY_HAVE_SCALAR_BF16_OPERATORS
1924 return lhs.native <=> rhs.native;
1926 return F32FromBF16(lhs) <=> F32FromBF16(rhs);
1936template <
typename T>
1943 enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
1950 enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
1959 enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
1968 enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
1977 enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
1986 enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
1995 enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
2003 enum { is_signed = 1, is_float = 0, is_bf16 = 0 };
2009 enum { is_signed = 0, is_float = 0, is_bf16 = 0 };
2017 enum { is_signed = 1, is_float = 1, is_bf16 = 0 };
2024 enum { is_signed = 1, is_float = 1, is_bf16 = 1 };
2033 enum { is_signed = 1, is_float = 1, is_bf16 = 0 };
2041 enum { is_signed = 1, is_float = 1, is_bf16 = 0 };
2077template <
typename T>
2079template <
typename T>
2081template <
typename T>
2085template <
typename T>
2087template <
typename T>
2104template <
typename T,
class R = detail::Relations<T>>
2106 ->
hwy::SizeTag<((R::is_signed + R::is_float + R::is_bf16) << 8)> {
2113template <
typename T,
class R = detail::Relations<T>>
2121template <
typename T>
2123 return IsSameEither<RemoveCvRef<T>, float,
double>();
2126template <
typename T>
2130 return IsSame<RemoveCvRef<T>,
float16_t>() || IsFloat3264<T>();
2133template <
typename T>
2135 return static_cast<T
>(0) >
static_cast<T
>(-1);
2138constexpr bool IsSigned<float16_t>() {
2142constexpr bool IsSigned<bfloat16_t>() {
2146constexpr bool IsSigned<hwy::uint128_t>() {
2150constexpr bool IsSigned<hwy::K64V64>() {
2154constexpr bool IsSigned<hwy::K32V32>() {
2158template <typename T, bool = IsInteger<T>() && !IsIntegerLaneType<T>()>
2163template <
typename T>
2169template <
typename T>
2173template <
typename T>
2175 static_assert(IsInteger<T>(),
"Only for integer types");
2177 return static_cast<T
>(IsSigned<T>() ? (
static_cast<TU
>(~TU(0)) >> 1)
2178 :
static_cast<TU
>(~TU(0)));
2180template <
typename T>
2182 static_assert(IsInteger<T>(),
"Only for integer types");
2183 return IsSigned<T>() ?
static_cast<T
>(-1) - LimitsMax<T>()
2184 :
static_cast<T
>(0);
2190template <
typename T>
2192 return LimitsMin<T>();
2196 return bfloat16_t::FromBits(uint16_t{0xFF7Fu});
2200 return float16_t::FromBits(uint16_t{0xFBFFu});
2204 return -3.402823466e+38F;
2208 return -1.7976931348623158e+308;
2211template <
typename T>
2213 return LimitsMax<T>();
2217 return bfloat16_t::FromBits(uint16_t{0x7F7Fu});
2221 return float16_t::FromBits(uint16_t{0x7BFFu});
2225 return 3.402823466e+38F;
2229 return 1.7976931348623158e+308;
2234template <
typename T>
2240 return bfloat16_t::FromBits(uint16_t{0x3C00u});
2244 return float16_t::FromBits(uint16_t{0x1400u});
2248 return 1.192092896e-7f;
2252 return 2.2204460492503131e-16;
2256template <
typename T>
2258 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
2262constexpr int MantissaBits<bfloat16_t>() {
2266constexpr int MantissaBits<float16_t>() {
2270constexpr int MantissaBits<float>() {
2274constexpr int MantissaBits<double>() {
2280template <
typename T>
2286template <
typename T>
2292template <
typename T>
2299template <
typename T>
2306template <
typename T>
2308 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
2313 return bfloat16_t::FromBits(uint16_t{0x4300u});
2317 return float16_t::FromBits(uint16_t{0x6400u});
2326 return 4503599627370496.0;
2330template <
typename T>
2333 return 8 *
sizeof(T) - 1 - MantissaBits<T>();
2339template <
typename T>
2347#if HWY_HAVE_SCALAR_F16_OPERATORS || HWY_HAVE_SCALAR_BF16_OPERATORS
2349#define HWY_RHS_SPECIAL_FLOAT_ARITH_OP(op, op_func, T2) \
2352 hwy::EnableIf<hwy::IsInteger<RemoveCvRef<T1>>() || \
2353 hwy::IsFloat3264<RemoveCvRef<T1>>()>* = nullptr, \
2354 typename RawResultT = decltype(DeclVal<T1>() op DeclVal<T2::Native>()), \
2355 typename ResultT = detail::NativeSpecialFloatToWrapper<RawResultT>, \
2356 HWY_IF_CASTABLE(RawResultT, ResultT)> \
2357 static HWY_INLINE constexpr ResultT op_func(T1 a, T2 b) noexcept { \
2358 return static_cast<ResultT>(a op b.native); \
2361#define HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(op, op_func, T1) \
2362 HWY_RHS_SPECIAL_FLOAT_ARITH_OP(op, op_func, T1) \
2365 hwy::EnableIf<hwy::IsInteger<RemoveCvRef<T2>>() || \
2366 hwy::IsFloat3264<RemoveCvRef<T2>>()>* = nullptr, \
2367 typename RawResultT = decltype(DeclVal<T1::Native>() op DeclVal<T2>()), \
2368 typename ResultT = detail::NativeSpecialFloatToWrapper<RawResultT>, \
2369 HWY_IF_CASTABLE(RawResultT, ResultT)> \
2370 static HWY_INLINE constexpr ResultT op_func(T1 a, T2 b) noexcept { \
2371 return static_cast<ResultT>(a.native op b); \
2374#if HWY_HAVE_SCALAR_F16_OPERATORS
2375HWY_RHS_SPECIAL_FLOAT_ARITH_OP(+,
operator+, float16_t)
2376HWY_RHS_SPECIAL_FLOAT_ARITH_OP(-,
operator-, float16_t)
2377HWY_RHS_SPECIAL_FLOAT_ARITH_OP(*,
operator*, float16_t)
2378HWY_RHS_SPECIAL_FLOAT_ARITH_OP(/,
operator/, float16_t)
2379HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(==,
operator==, float16_t)
2380HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(!=,
operator!=, float16_t)
2381HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<,
operator<, float16_t)
2382HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=,
operator<=, float16_t)
2383HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>,
operator>, float16_t)
2384HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>=,
operator>=, float16_t)
2385#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
2386HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=>, operator<=>, float16_t)
2390#if HWY_HAVE_SCALAR_BF16_OPERATORS
2391HWY_RHS_SPECIAL_FLOAT_ARITH_OP(+,
operator+, bfloat16_t)
2392HWY_RHS_SPECIAL_FLOAT_ARITH_OP(-,
operator-, bfloat16_t)
2393HWY_RHS_SPECIAL_FLOAT_ARITH_OP(*,
operator*, bfloat16_t)
2394HWY_RHS_SPECIAL_FLOAT_ARITH_OP(/,
operator/, bfloat16_t)
2395HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(==,
operator==, bfloat16_t)
2396HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(!=,
operator!=, bfloat16_t)
2397HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<,
operator<, bfloat16_t)
2398HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=,
operator<=, bfloat16_t)
2399HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>,
operator>, bfloat16_t)
2400HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(>=,
operator>=, bfloat16_t)
2401#if HWY_HAVE_CXX20_THREE_WAY_COMPARE
2402HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP(<=>, operator<=>, bfloat16_t)
2406#undef HWY_RHS_SPECIAL_FLOAT_ARITH_OP
2407#undef HWY_SPECIAL_FLOAT_CMP_AGAINST_NON_SPECIAL_OP
2426#if HWY_HAVE_SCALAR_F16_OPERATORS
2427#define HWY_BF16_TO_F16_CONSTEXPR HWY_BF16_CONSTEXPR
2429#define HWY_BF16_TO_F16_CONSTEXPR HWY_F16_CONSTEXPR
2436 return static_cast<TTo
>(in);
2438template <
typename TTo,
typename TFrom,
HWY_IF_F16(TTo),
2440HWY_API constexpr TTo ConvertScalarTo(
const TFrom in) {
2441 return F16FromF32(
static_cast<float>(in));
2443template <
typename TTo, HWY_IF_F16(TTo)>
2448template <
typename TTo, HWY_IF_F16(TTo)>
2452template <
typename TTo,
typename TFrom,
HWY_IF_BF16(TTo),
2457template <
typename TTo, HWY_IF_BF16(TTo)>
2461template <
typename TTo, HWY_IF_BF16(TTo)>
2465template <
typename TTo,
typename TFrom,
HWY_IF_F16(TFrom),
2470template <
typename TTo,
typename TFrom,
HWY_IF_BF16(TFrom),
2476template <
typename TTo>
2484template <
typename T1,
typename T2>
2486 return (a + b - 1) / b;
2490constexpr inline size_t RoundUpTo(
size_t what,
size_t align) {
2491 return DivCeil(what, align) * align;
2496 return what - (what % align);
2506 return static_cast<T
>(val >> shift_amt);
2514 return static_cast<T
>(
2515 (val < 0) ? static_cast<TU>(
2516 ~(
static_cast<TU
>(~
static_cast<TU
>(val)) >> shift_amt))
2517 :
static_cast<TU
>(
static_cast<TU
>(val) >> shift_amt));
2527template <
class T, HWY_IF_INTEGER(RemoveCvRef<T>)>
2530 return detail::ScalarShr(
2532 (LimitsMin<NonCvRefT>() >> (
sizeof(T) * 8 - 1)) !=
2533 static_cast<NonCvRefT
>(-1))
2536 static_cast<NonCvRefT
>(val), shift_amt);
2542#if HWY_COMPILER_MSVC
2543 unsigned long index;
2544 _BitScanForward(&index, x);
2547 return static_cast<size_t>(__builtin_ctz(x));
2553#if HWY_COMPILER_MSVC
2555 unsigned long index;
2556 _BitScanForward64(&index, x);
2560 uint32_t lsb =
static_cast<uint32_t
>(x & 0xFFFFFFFF);
2561 unsigned long index;
2563 uint32_t msb =
static_cast<uint32_t
>(x >> 32u);
2564 _BitScanForward(&index, msb);
2567 _BitScanForward(&index, lsb);
2572 return static_cast<size_t>(__builtin_ctzll(x));
2579#if HWY_COMPILER_MSVC
2580 unsigned long index;
2581 _BitScanReverse(&index, x);
2584 return static_cast<size_t>(__builtin_clz(x));
2590#if HWY_COMPILER_MSVC
2592 unsigned long index;
2593 _BitScanReverse64(&index, x);
2597 const uint32_t msb =
static_cast<uint32_t
>(x >> 32u);
2598 unsigned long index;
2600 const uint32_t lsb =
static_cast<uint32_t
>(x & 0xFFFFFFFF);
2601 _BitScanReverse(&index, lsb);
2604 _BitScanReverse(&index, msb);
2609 return static_cast<size_t>(__builtin_clzll(x));
2613template <
class T, HWY_IF_INTEGER(RemoveCvRef<T>),
2614 HWY_IF_T_SIZE_ONE_OF(RemoveCvRef<T>, (1 << 1) | (1 << 2) | (1 << 4))>
2615HWY_API
size_t PopCount(T x) {
2616 u
int32_t u32_x = static_cast<u
int32_t>(
2617 static_cast<Un
signedFromSize<sizeof(RemoveCvRef<T>)>>(x));
2619#if HWY_COMPILER_GCC || HWY_COMPILER_CLANG
2620 return static_cast<
size_t>(__builtin_popcountl(u32_x));
2621#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 && defined(__AVX__)
2622 return static_cast<
size_t>(_mm_popcnt_u32(u32_x));
2624 u32_x -= ((u32_x >> 1) & 0x55555555u);
2625 u32_x = (((u32_x >> 2) & 0x33333333u) + (u32_x & 0x33333333u));
2626 u32_x = (((u32_x >> 4) + u32_x) & 0x0F0F0F0Fu);
2627 u32_x += (u32_x >> 8);
2628 u32_x += (u32_x >> 16);
2629 return static_cast<size_t>(u32_x & 0x3Fu);
2633template <
class T, HWY_IF_INTEGER(RemoveCvRef<T>),
2634 HWY_IF_T_SIZE(RemoveCvRef<T>, 8)>
2636 uint64_t u64_x =
static_cast<uint64_t
>(
2637 static_cast<UnsignedFromSize<sizeof(RemoveCvRef<T>)
>>(x));
2639#if HWY_COMPILER_GCC || HWY_COMPILER_CLANG
2640 return static_cast<size_t>(__builtin_popcountll(u64_x));
2641#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64 && defined(__AVX__)
2642 return _mm_popcnt_u64(u64_x);
2643#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 && defined(__AVX__)
2644 return _mm_popcnt_u32(
static_cast<uint32_t
>(u64_x & 0xFFFFFFFFu)) +
2645 _mm_popcnt_u32(
static_cast<uint32_t
>(u64_x >> 32));
2647 u64_x -= ((u64_x >> 1) & 0x5555555555555555ULL);
2648 u64_x = (((u64_x >> 2) & 0x3333333333333333ULL) +
2649 (u64_x & 0x3333333333333333ULL));
2650 u64_x = (((u64_x >> 4) + u64_x) & 0x0F0F0F0F0F0F0F0FULL);
2651 u64_x += (u64_x >> 8);
2652 u64_x += (u64_x >> 16);
2653 u64_x += (u64_x >> 32);
2654 return static_cast<size_t>(u64_x & 0x7Fu);
2661template <
typename TI>
2665 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(x >> 1)) + 1);
2668template <
typename TI>
2672 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(x - 1)) + 1);
2675template <
typename T,
typename T2, HWY_IF_FLOAT(T), HWY_IF_NOT_SPECIAL_FLOAT(T)>
2677 return t +
static_cast<T
>(increment);
2680template <
typename T,
typename T2, HWY_IF_SPECIAL_FLOAT(T)>
2682 return ConvertScalarTo<T>(ConvertScalarTo<float>(t) +
2683 ConvertScalarTo<float>(increment));
2686template <
typename T,
typename T2, HWY_IF_NOT_FLOAT(T)>
2687HWY_INLINE constexpr T AddWithWraparound(T t, T2 n) {
2688 using TU = MakeUnsigned<T>;
2693 return static_cast<T
>(
static_cast<TU
>(
2694 static_cast<unsigned long long>(
static_cast<unsigned long long>(t) +
2695 static_cast<unsigned long long>(n)) &
2696 uint64_t{hwy::LimitsMax<TU>()}));
2699#if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
2700#pragma intrinsic(_mul128)
2701#pragma intrinsic(_umul128)
2706#if defined(__SIZEOF_INT128__)
2707 __uint128_t product = (__uint128_t)a * (__uint128_t)b;
2708 *upper = (uint64_t)(product >> 64);
2709 return (uint64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
2710#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
2711 return _umul128(a, b, upper);
2713 constexpr uint64_t kLo32 = 0xFFFFFFFFU;
2714 const uint64_t lo_lo = (a & kLo32) * (b & kLo32);
2715 const uint64_t hi_lo = (a >> 32) * (b & kLo32);
2716 const uint64_t lo_hi = (a & kLo32) * (b >> 32);
2717 const uint64_t hi_hi = (a >> 32) * (b >> 32);
2718 const uint64_t t = (lo_lo >> 32) + (hi_lo & kLo32) + lo_hi;
2719 *upper = (hi_lo >> 32) + (t >> 32) + hi_hi;
2720 return (t << 32) | (lo_lo & kLo32);
2725#if defined(__SIZEOF_INT128__)
2726 __int128_t product = (__int128_t)a * (__int128_t)b;
2727 *upper = (int64_t)(product >> 64);
2728 return (int64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
2729#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
2730 return _mul128(a, b, upper);
2732 uint64_t unsigned_upper;
2733 const int64_t lower =
static_cast<int64_t
>(
Mul128(
2734 static_cast<uint64_t
>(a),
static_cast<uint64_t
>(b), &unsigned_upper));
2735 *upper =
static_cast<int64_t
>(
2737 (
static_cast<uint64_t
>(ScalarShr(a, 63)) &
static_cast<uint64_t
>(b)) -
2738 (
static_cast<uint64_t
>(ScalarShr(b, 63)) &
static_cast<uint64_t
>(a)));
2747 explicit Divisor(uint32_t divisor) : divisor_(divisor) {
2748 if (divisor <= 1)
return;
2750 const uint32_t len =
2752 const uint64_t u_hi = (2ULL << len) - divisor;
2753 const uint32_t q = Truncate((u_hi << 32) / divisor);
2764 const uint64_t mul = mul_;
2765 const uint32_t t = Truncate((mul * n) >> 32);
2766 return (t + ((n - t) >> shift1_)) >> shift2_;
2770 uint32_t
Remainder(uint32_t n)
const {
return n - (Divide(n) * divisor_); }
2774 return static_cast<uint32_t
>(x & 0xFFFFFFFFu);
2779 uint32_t shift1_ = 0;
2780 uint32_t shift2_ = 0;
2785template <
typename T>
2789 return BitCastScalar<T>(
2790 static_cast<TU
>(BitCastScalar<TU>(val) & (~SignMask<T>())));
2793template <
typename T>
2799template <
typename T>
2803 return (val < T{0}) ?
static_cast<T
>(TU{0} -
static_cast<TU
>(val)) : val;
2806template <
typename T>
2814template <
typename T>
2818 return detail::ScalarAbs(hwy::TypeTag<TVal>(),
static_cast<TVal
>(val));
2821template <
typename T>
2825 return (BitCastScalar<TU>(ScalarAbs(val)) > ExponentMask<TF>());
2828template <
typename T>
2832 return static_cast<TU
>(BitCastScalar<TU>(
static_cast<TF
>(val)) << 1) ==
2833 static_cast<TU
>(MaxExponentTimes2<TF>());
2838template <
typename T>
2842 return (BitCastScalar<TU>(
hwy::ScalarAbs(val)) < ExponentMask<T>());
2845template <
typename T>
2854template <
typename T>
2858 return detail::ScalarIsFinite(hwy::IsFloatTag<TVal>(),
2859 static_cast<TVal
>(val));
2862template <
typename T>
2867 return BitCastScalar<TF>(
static_cast<TU
>(
2868 (BitCastScalar<TU>(
static_cast<TF
>(magn)) & (~SignMask<TF>())) |
2869 (BitCastScalar<TU>(
static_cast<TF
>(sign)) & SignMask<TF>())));
2872template <
typename T>
2877 return ((BitCastScalar<TU>(
static_cast<TVal
>(val)) & SignMask<TVal>()) != 0);
2881#if HWY_ARCH_PPC && (HWY_COMPILER_GCC || HWY_COMPILER_CLANG) && \
2882 !defined(_SOFT_FLOAT)
2884template <
class T, HWY_IF_F32(T)>
2885HWY_API void PreventElision(T&& output) {
2886 asm volatile(
"" :
"+f"(output)::
"memory");
2888template <
class T, HWY_IF_F64(T)>
2890 asm volatile(
"" :
"+d"(output)::
"memory");
2892template <
class T, HWY_IF_NOT_FLOAT3264(T)>
2894 asm volatile(
"" :
"+r"(output)::
"memory");
2899#if HWY_COMPILER_MSVC
2904 static std::atomic<RemoveCvRef<T>> sink;
2905 sink.store(output, std::memory_order_relaxed);
2910 asm volatile(
"" :
"+r"(output) : :
"memory");
#define HWY_MAX(a, b)
Definition base.h:177
#define HWY_RESTRICT
Definition base.h:95
#define HWY_DIAGNOSTICS(tokens)
Definition base.h:109
#define HWY_BITCASTSCALAR_CONSTEXPR
Definition base.h:954
#define HWY_NORETURN
Definition base.h:105
#define HWY_FORMAT(idx_fmt, idx_arg)
Definition base.h:128
#define HWY_API
Definition base.h:171
#define HWY_MIN(a, b)
Definition base.h:176
#define HWY_BF16_CONSTEXPR
Definition base.h:1591
#define HWY_CXX14_CONSTEXPR
Definition base.h:304
#define HWY_INLINE
Definition base.h:101
#define HWY_DIAGNOSTICS_OFF(msc, gcc)
Definition base.h:110
#define HWY_DASSERT(condition)
Definition base.h:290
#define HWY_MAYBE_UNUSED
Definition base.h:113
#define HWY_IF_BF16(T)
Definition base.h:673
#define HWY_F16_CONSTEXPR
Definition base.h:1301
#define HWY_ASSUME_ALIGNED(ptr, align)
Definition base.h:139
#define HWY_BF16_TO_F16_CONSTEXPR
Definition base.h:2429
#define HWY_IF_NOT_SPECIAL_FLOAT(T)
Definition base.h:631
#define HWY_IF_F16(T)
Definition base.h:676
#define HWY_IF_NOT_SAME(T, expected)
Definition base.h:654
#define HWY_F16_FROM_F32_DASSERT(condition)
Definition base.h:1370
@ kDisableDeclValEvaluation
Definition base.h:706
decltype(TryAddRValRef< T >(0)) type
Definition base.h:705
static U TryAddRValRef(Arg)
static URef TryAddRValRef(int)
uint32_t divisor_
Definition base.h:2777
Divisor(uint32_t divisor)
Definition base.h:2747
uint32_t Divide(uint32_t n) const
Definition base.h:2763
static uint32_t Truncate(uint64_t x)
Definition base.h:2773
uint32_t GetDivisor() const
Definition base.h:2760
uint32_t Remainder(uint32_t n) const
Definition base.h:2770
static hwy::SizeTag< 1 > TryAssignTest(int)
static hwy::SizeTag< 0 > TryAssignTest(Arg)
static decltype(IsConvertibleT< T, U >::template TestFuncWithToArg< U >(DeclVal< T >())) TryConvTest(int)
static hwy::SizeTag< 0 > TryConvTest(Arg)
static hwy::SizeTag< 1 > TryStaticCastTest(int)
static hwy::SizeTag< 0 > TryStaticCastTest(Arg)
#define HWY_DLLEXPORT
Definition highway_export.h:13
HWY_API Vec128< T, N > operator+(Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:605
HWY_API Vec128< T, N > operator-(Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:601
typename SpecialFloatUnwrapArithOpOperandT< T >::type SpecialFloatUnwrapArithOpOperand
Definition base.h:1099
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR T ScalarAbs(hwy::FloatTag, T val)
Definition base.h:2786
static HWY_INLINE HWY_MAYBE_UNUSED constexpr uint16_t F32BitsToBF16Bits(const uint32_t f32_bits)
Definition base.h:1800
static HWY_INLINE constexpr T ScalarShr(hwy::UnsignedTag, T val, int shift_amt)
Definition base.h:2504
static HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsFinite(hwy::FloatTag, T val)
Definition base.h:2839
static HWY_INLINE HWY_MAYBE_UNUSED constexpr uint32_t F32BitsToBF16RoundIncr(const uint32_t f32_bits)
Definition base.h:1791
typename NativeSpecialFloatToWrapperT< T >::type NativeSpecialFloatToWrapper
Definition base.h:1108
double float64_t
Definition base.h:406
HWY_API HWY_F16_CONSTEXPR float F32FromF16(float16_t f16)
Definition base.h:1304
HWY_API void CopyBytes(const From *from, To *to)
Definition base.h:327
HWY_API float F32FromF16Mem(const void *ptr)
Definition base.h:2414
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x)
Definition base.h:2551
typename MakeLaneTypeIfIntegerT< T >::type MakeLaneTypeIfInteger
Definition base.h:2170
constexpr MakeSigned< T > MaxExponentTimes2()
Definition base.h:2281
constexpr MakeUnsigned< T > MantissaMask()
Definition base.h:2300
typename RemoveConstT< T >::type RemoveConst
Definition base.h:547
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T LowestValue()
Definition base.h:2191
HWY_API void ZeroBytes(To *to)
Definition base.h:352
HWY_API void PreventElision(T &&output)
Definition base.h:2898
HWY_API HWY_BITCASTSCALAR_CONSTEXPR RemoveCvRef< T > ScalarCopySign(T magn, T sign)
Definition base.h:2863
HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t *HWY_RESTRICT upper)
Definition base.h:2705
HWY_API constexpr T LimitsMin()
Definition base.h:2181
RemoveConst< RemoveVolatile< RemoveRef< T > > > RemoveCvRef
Definition base.h:578
constexpr bool operator!=(const AlignedAllocator< T > &, const AlignedAllocator< V > &) noexcept
Definition aligned_allocator.h:166
HWY_API constexpr bool IsFloat3264()
Definition base.h:2122
HWY_API HWY_BITCASTSCALAR_CONSTEXPR To BitCastScalar(const From &val)
Definition base.h:1024
HWY_INLINE constexpr T AddWithWraparound(T t, T2 increment)
Definition base.h:2676
HWY_API HWY_BF16_CONSTEXPR float F32FromBF16(bfloat16_t bf)
Definition base.h:1778
constexpr T1 DivCeil(T1 a, T2 b)
Definition base.h:2485
HWY_API constexpr TTo ConvertScalarTo(const TFrom in)
Definition base.h:2435
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition base.h:2078
constexpr bool operator==(const AlignedAllocator< T > &, const AlignedAllocator< V > &) noexcept
Definition aligned_allocator.h:160
HWY_API constexpr bool IsSame()
Definition base.h:499
typename RemoveVolatileT< T >::type RemoveVolatile
Definition base.h:559
HWY_API constexpr bool IsConst()
Definition base.h:533
HWY_API constexpr bool IsSigned()
Definition base.h:2134
HWY_API void CopySameSize(const From *HWY_RESTRICT from, To *HWY_RESTRICT to)
Definition base.h:346
constexpr size_t FloorLog2(TI x)
Definition base.h:2662
typename RemovePtrT< T >::type RemovePtr
Definition base.h:602
constexpr MakeUnsigned< T > ExponentMask()
Definition base.h:2293
HWY_API HWY_BITCASTSCALAR_CONSTEXPR bool ScalarSignBit(T val)
Definition base.h:2873
HWY_API HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsNaN(T val)
Definition base.h:2822
typename detail::Relations< T >::Float MakeFloat
Definition base.h:2082
HWY_API HWY_BITCASTSCALAR_CONSTEXPR bool ScalarIsInf(T val)
Definition base.h:2829
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero32(const uint32_t x)
Definition base.h:2577
typename IfT< Condition, Then, Else >::type If
Definition base.h:520
typename detail::TypeFromSize< N >::Unsigned UnsignedFromSize
Definition base.h:2092
HWY_API constexpr bool IsIntegerLaneType()
Definition base.h:840
constexpr auto IsFloatTag() -> hwy::SizeTag<(R::is_float ? 0x200 :0x400)>
Definition base.h:2114
HWY_API constexpr bool IsConvertible()
Definition base.h:774
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T Epsilon()
Definition base.h:2235
HWY_API HWY_F16_CONSTEXPR float16_t F16FromF64(double f64)
Definition base.h:1443
HWY_API DeclValT< T >::type DeclVal() noexcept
Definition base.h:714
typename detail::TypeFromSize< N >::Signed SignedFromSize
Definition base.h:2094
static HWY_MAYBE_UNUSED bool operator>(const uint128_t &a, const uint128_t &b)
Definition base.h:438
float float32_t
Definition base.h:405
constexpr int MantissaBits()
Definition base.h:2257
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition base.h:2540
HWY_API HWY_BF16_CONSTEXPR bfloat16_t BF16FromF64(double f64)
Definition base.h:1826
HWY_API HWY_BF16_CONSTEXPR bfloat16_t BF16FromF32(float f)
Definition base.h:1817
constexpr MakeSigned< T > MaxExponentField()
Definition base.h:2340
HWY_API constexpr bool IsSameEither()
Definition base.h:505
HWY_INLINE HWY_BITCASTSCALAR_CONSTEXPR T MantissaEnd()
Definition base.h:2307
HWY_F16_CONSTEXPR bool operator>=(float16_t lhs, float16_t rhs) noexcept
Definition base.h:1524
static constexpr bool IsArray()
Definition base.h:735
HWY_API constexpr bool IsInteger()
Definition base.h:877
static constexpr bool IsStaticCastable()
Definition base.h:794
HWY_API constexpr bool IsSpecialFloat()
Definition base.h:832
constexpr size_t CeilLog2(TI x)
Definition base.h:2669
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero64(const uint64_t x)
Definition base.h:2588
HWY_API float F32FromBF16Mem(const void *ptr)
Definition base.h:2420
typename detail::TypeFromSize< N >::Float FloatFromSize
Definition base.h:2096
constexpr MakeUnsigned< T > SignMask()
Definition base.h:2287
static HWY_MAYBE_UNUSED bool operator<(const uint128_t &a, const uint128_t &b)
Definition base.h:433
HWY_API HWY_F16_CONSTEXPR float16_t F16FromF32(float f32)
Definition base.h:1374
typename EnableIfT< Condition >::type EnableIf
Definition base.h:486
constexpr auto TypeTag() -> hwy::SizeTag<((R::is_signed+R::is_float+R::is_bf16)<< 8)>
Definition base.h:2105
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize
Definition base.h:378
typename detail::Relations< T >::Narrow MakeNarrow
Definition base.h:2088
HWY_API HWY_BITCASTSCALAR_CONSTEXPR RemoveCvRef< T > ScalarAbs(T val)
Definition base.h:2815
typename RemoveRefT< T >::type RemoveRef
Definition base.h:575
HWY_API constexpr bool IsFloat()
Definition base.h:2127
static constexpr bool IsAssignable()
Definition base.h:820
constexpr size_t RoundDownTo(size_t what, size_t align)
Definition base.h:2495
HWY_DLLEXPORT HWY_NORETURN void int const char * format
Definition base.h:231
HWY_DLLEXPORT HWY_NORETURN void int line
Definition base.h:231
HWY_API constexpr T LimitsMax()
Definition base.h:2174
constexpr size_t RoundUpTo(size_t what, size_t align)
Definition base.h:2490
HWY_F16_CONSTEXPR bool operator<=(float16_t lhs, float16_t rhs) noexcept
Definition base.h:1509
typename detail::Relations< T >::Wide MakeWide
Definition base.h:2086
constexpr int ExponentBits()
Definition base.h:2331
typename detail::Relations< T >::Signed MakeSigned
Definition base.h:2080
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T HighestValue()
Definition base.h:2212
void type
Definition base.h:482
Else type
Definition base.h:516
Then type
Definition base.h:511
@ value
Definition base.h:721
@ value
Definition base.h:524
@ value
Definition base.h:490
uint32_t value
Definition base.h:427
uint32_t key
Definition base.h:428
uint64_t value
Definition base.h:420
uint64_t key
Definition base.h:421
hwy::If< IsSigned< T >(), SignedFromSize< sizeof(T)>, UnsignedFromSize< sizeof(T)> > type
Definition base.h:2165
T type
Definition base.h:2160
T type
Definition base.h:543
T type
Definition base.h:539
T type
Definition base.h:586
T type
Definition base.h:590
T type
Definition base.h:598
T type
Definition base.h:594
T type
Definition base.h:582
T type
Definition base.h:567
T type
Definition base.h:571
T type
Definition base.h:563
T type
Definition base.h:555
T type
Definition base.h:551
constexpr bfloat16_t(BF16FromU16BitsTag, uint16_t u16_bits)
Definition base.h:1630
bfloat16_t() noexcept=default
uint16_t bits
Definition base.h:1606
static constexpr bfloat16_t FromBits(uint16_t bits)
Definition base.h:1634
T type
Definition base.h:1104
int16_t Signed
Definition base.h:2022
float Wide
Definition base.h:2023
uint16_t Unsigned
Definition base.h:2021
double Float
Definition base.h:2039
uint64_t Unsigned
Definition base.h:2037
int64_t Signed
Definition base.h:2038
float Narrow
Definition base.h:2040
int16_t Signed
Definition base.h:2014
float Wide
Definition base.h:2016
uint16_t Unsigned
Definition base.h:2013
uint32_t Unsigned
Definition base.h:2028
double Wide
Definition base.h:2031
float Float
Definition base.h:2030
int32_t Signed
Definition base.h:2029
uint16_t Unsigned
Definition base.h:1963
int16_t Signed
Definition base.h:1964
int32_t Wide
Definition base.h:1966
int8_t Narrow
Definition base.h:1967
uint32_t Unsigned
Definition base.h:1981
int64_t Wide
Definition base.h:1984
float Float
Definition base.h:1983
int16_t Narrow
Definition base.h:1985
int32_t Signed
Definition base.h:1982
int32_t Narrow
Definition base.h:2002
double Float
Definition base.h:2001
uint64_t Unsigned
Definition base.h:1999
int64_t Signed
Definition base.h:2000
int16_t Wide
Definition base.h:1949
int8_t Signed
Definition base.h:1948
uint8_t Unsigned
Definition base.h:1947
uint64_t Narrow
Definition base.h:2008
uint8_t Narrow
Definition base.h:1958
int16_t Signed
Definition base.h:1955
uint32_t Wide
Definition base.h:1957
uint16_t Unsigned
Definition base.h:1954
uint32_t Unsigned
Definition base.h:1972
uint64_t Wide
Definition base.h:1975
uint16_t Narrow
Definition base.h:1976
float Float
Definition base.h:1974
int32_t Signed
Definition base.h:1973
uint32_t Narrow
Definition base.h:1994
int64_t Signed
Definition base.h:1991
uint64_t Unsigned
Definition base.h:1990
double Float
Definition base.h:1992
int8_t Signed
Definition base.h:1941
uint8_t Unsigned
Definition base.h:1940
uint16_t Wide
Definition base.h:1942
T type
Definition base.h:1095
int8_t Signed
Definition base.h:2049
uint8_t Unsigned
Definition base.h:2048
int16_t Signed
Definition base.h:2054
uint16_t Unsigned
Definition base.h:2053
int32_t Signed
Definition base.h:2060
uint32_t Unsigned
Definition base.h:2059
float Float
Definition base.h:2061
double Float
Definition base.h:2067
int64_t Signed
Definition base.h:2066
uint64_t Unsigned
Definition base.h:2065
static constexpr float16_t FromBits(uint16_t bits)
Definition base.h:1164
float16_t() noexcept=default
constexpr float16_t(F16FromU16BitsTag, uint16_t u16_bits)
Definition base.h:1160
uint16_t bits
Definition base.h:1135
uint64_t lo
Definition base.h:413
uint64_t hi
Definition base.h:414