36#include <sanitizer/msan_interface.h>
42#if defined(HIGHWAY_HWY_OPS_SHARED_TOGGLE) == defined(HWY_TARGET_TOGGLE)
44#ifdef HIGHWAY_HWY_OPS_SHARED_TOGGLE
45#undef HIGHWAY_HWY_OPS_SHARED_TOGGLE
47#define HIGHWAY_HWY_OPS_SHARED_TOGGLE
64#if HWY_COMPILER_GCC_ACTUAL && (HWY_OS_WIN || HWY_ARCH_ARM_A64)
80#if HWY_HAVE_SCALAR_F16_TYPE
81 using type = hwy::float16_t::Native;
88#if HWY_HAVE_SCALAR_BF16_TYPE
89 using type = hwy::bfloat16_t::Native;
110template <
typename T, HWY_IF_NOT_SPECIAL_FLOAT(T)>
114template <
typename T,
typename NT = NativeLaneType<RemoveConst<T>>,
117#if HWY_HAVE_SCALAR_F16_TYPE
123template <
typename T,
typename NT = NativeLaneType<RemoveConst<T>>,
126#if HWY_HAVE_SCALAR_BF16_TYPE
138template <
typename T, HWY_IF_SPECIAL_FLOAT(T)>
147 return pow2 >= 0 ? (N << pow2) : (N >> (-pow2));
154 __msan_unpoison(unaligned, count *
sizeof(T));
197template <
typename Lane,
size_t N,
int kPow2>
203 static_assert(
sizeof(Lane) <= 8,
"Lanes are up to 64-bit");
204 static_assert(IsSame<Lane, RemoveCvRef<Lane>>(),
205 "Lane must not be a reference type, const-qualified type, or "
206 "volatile-qualified type");
207 static_assert(IsIntegerLaneType<Lane>() || IsFloat<Lane>() ||
208 IsSpecialFloat<Lane>(),
209 "IsIntegerLaneType<T>(), IsFloat<T>(), or IsSpecialFloat<T>() "
213 static constexpr size_t kWhole = N & 0xFFFFF;
215 static constexpr int kFrac =
static_cast<int>(N >> 20);
219 static_assert(
kFrac == 0 ||
kWhole == 1,
"If frac, whole must be 1");
223 static_assert(kPow2 >=
HWY_MIN_POW2,
"Forgot kPow2 recursion terminator?");
253 constexpr int Pow2()
const {
return kPow2; }
260 template <
typename NewT>
263 return (
kPrivateLanes *
sizeof(
T) +
sizeof(NewT) - 1) /
sizeof(NewT);
267 template <
typename NewT>
270 ((
sizeof(NewT) >=
sizeof(
T))
271 ?
static_cast<int>(
CeilLog2(
sizeof(NewT) /
sizeof(
T)))
272 : -
static_cast<int>(
CeilLog2(
sizeof(
T) /
sizeof(NewT))));
277 template <
int kNewPow2,
size_t kNewMaxLanes>
283 template <
int kNewPow2,
size_t kNewMaxLanes>
289 static_assert(
HWY_MAX_N <= (
size_t{1} << 20),
"Change bit shift");
290 return static_cast<size_t>(
297 template <
int kNewPow2,
size_t kNewMaxLanes>
298 static constexpr size_t NewN() {
300 return WholeN<kNewPow2, kNewMaxLanes>() == 0
301 ? FracN<kNewPow2, kNewMaxLanes>()
302 : WholeN<kNewPow2, kNewMaxLanes>();
306 template <
typename NewT>
311 template <
typename NewT>
324template <
typename T,
size_t N,
int kPow2>
330template <
typename T,
size_t N,
int kPow2>
335template <
typename T,
int kPow2>
340template <
typename T,
size_t kLimit,
int kPow2>
342 static_assert(kLimit != 0,
"Does not make sense to have zero lanes");
350template <
typename T,
size_t kNumLanes>
352 static_assert(kNumLanes != 0,
"Does not make sense to have zero lanes");
353 static_assert(kNumLanes <=
HWY_LANES(T),
"Too many lanes");
366template <
typename T,
int kPow2 = 0>
378template <
typename T,
size_t kLimit,
int kPow2 = 0>
381#if !HWY_HAVE_SCALABLE
385template <
typename T,
size_t kLimit,
int kPow2 = 0>
389template <
typename T,
size_t kLimit,
int kPow2 = 0>
406template <
typename T,
size_t kNumLanes>
432#define HWY_MAX_LANES_D(D) D::kPrivateLanes
436#define HWY_POW2_D(D) D::kPrivatePow2
446#if !HWY_HAVE_SCALABLE
459template <
class T,
class D>
470template <
class T,
class D>
498class BlockDFromD_t {};
500template <
typename T,
size_t N,
int kPow2>
501class BlockDFromD_t<
Simd<T, N, kPow2>> {
503 static constexpr int kNewPow2 =
HWY_MIN(kPow2, 0);
505 static constexpr size_t kNewN = D::template NewN<kNewPow2, kMaxLpb>();
514using BlockDFromD =
typename detail::BlockDFromD_t<RemoveConst<D>>::type;
522template <
class D,
typename T>
524 const size_t N =
Lanes(
d);
525 return reinterpret_cast<uintptr_t
>(ptr) % (N *
sizeof(T)) == 0;
531#define HWY_IF_UNSIGNED_D(D) HWY_IF_UNSIGNED(hwy::HWY_NAMESPACE::TFromD<D>)
532#define HWY_IF_NOT_UNSIGNED_D(D) \
533 HWY_IF_NOT_UNSIGNED(hwy::HWY_NAMESPACE::TFromD<D>)
534#define HWY_IF_SIGNED_D(D) HWY_IF_SIGNED(hwy::HWY_NAMESPACE::TFromD<D>)
535#define HWY_IF_FLOAT_D(D) HWY_IF_FLOAT(hwy::HWY_NAMESPACE::TFromD<D>)
536#define HWY_IF_NOT_FLOAT_D(D) HWY_IF_NOT_FLOAT(hwy::HWY_NAMESPACE::TFromD<D>)
537#define HWY_IF_FLOAT3264_D(D) HWY_IF_FLOAT3264(hwy::HWY_NAMESPACE::TFromD<D>)
538#define HWY_IF_NOT_FLOAT3264_D(D) \
539 HWY_IF_NOT_FLOAT3264(hwy::HWY_NAMESPACE::TFromD<D>)
540#define HWY_IF_SPECIAL_FLOAT_D(D) \
541 HWY_IF_SPECIAL_FLOAT(hwy::HWY_NAMESPACE::TFromD<D>)
542#define HWY_IF_NOT_SPECIAL_FLOAT_D(D) \
543 HWY_IF_NOT_SPECIAL_FLOAT(hwy::HWY_NAMESPACE::TFromD<D>)
544#define HWY_IF_FLOAT_OR_SPECIAL_D(D) \
545 HWY_IF_FLOAT_OR_SPECIAL(hwy::HWY_NAMESPACE::TFromD<D>)
546#define HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D) \
547 HWY_IF_NOT_FLOAT_NOR_SPECIAL(hwy::HWY_NAMESPACE::TFromD<D>)
549#define HWY_IF_T_SIZE_D(D, bytes) \
550 HWY_IF_T_SIZE(hwy::HWY_NAMESPACE::TFromD<D>, bytes)
551#define HWY_IF_NOT_T_SIZE_D(D, bytes) \
552 HWY_IF_NOT_T_SIZE(hwy::HWY_NAMESPACE::TFromD<D>, bytes)
553#define HWY_IF_T_SIZE_ONE_OF_D(D, bit_array) \
554 HWY_IF_T_SIZE_ONE_OF(hwy::HWY_NAMESPACE::TFromD<D>, bit_array)
555#define HWY_IF_T_SIZE_LE_D(D, bytes) \
556 HWY_IF_T_SIZE_LE(hwy::HWY_NAMESPACE::TFromD<D>, bytes)
557#define HWY_IF_T_SIZE_GT_D(D, bytes) \
558 HWY_IF_T_SIZE_GT(hwy::HWY_NAMESPACE::TFromD<D>, bytes)
560#define HWY_IF_LANES_D(D, lanes) HWY_IF_LANES(HWY_MAX_LANES_D(D), lanes)
561#define HWY_IF_LANES_LE_D(D, lanes) HWY_IF_LANES_LE(HWY_MAX_LANES_D(D), lanes)
562#define HWY_IF_LANES_GT_D(D, lanes) HWY_IF_LANES_GT(HWY_MAX_LANES_D(D), lanes)
563#define HWY_IF_LANES_PER_BLOCK_D(D, lanes) \
564 HWY_IF_LANES_PER_BLOCK(hwy::HWY_NAMESPACE::TFromD<D>, HWY_MAX_LANES_D(D), \
568#define HWY_IF_POW2_LE_D(D, pow2) \
569 hwy::EnableIf<HWY_POW2_D(D) <= pow2>* = nullptr
570#define HWY_IF_POW2_GT_D(D, pow2) \
571 hwy::EnableIf<(HWY_POW2_D(D) > pow2)>* = nullptr
573#define HWY_IF_POW2_LE_D(D, pow2) hwy::EnableIf<D().Pow2() <= pow2>* = nullptr
574#define HWY_IF_POW2_GT_D(D, pow2) hwy::EnableIf<(D().Pow2() > pow2)>* = nullptr
577#define HWY_IF_U8_D(D) HWY_IF_U8(hwy::HWY_NAMESPACE::TFromD<D>)
578#define HWY_IF_U16_D(D) HWY_IF_U16(hwy::HWY_NAMESPACE::TFromD<D>)
579#define HWY_IF_U32_D(D) HWY_IF_U32(hwy::HWY_NAMESPACE::TFromD<D>)
580#define HWY_IF_U64_D(D) HWY_IF_U64(hwy::HWY_NAMESPACE::TFromD<D>)
582#define HWY_IF_I8_D(D) HWY_IF_I8(hwy::HWY_NAMESPACE::TFromD<D>)
583#define HWY_IF_I16_D(D) HWY_IF_I16(hwy::HWY_NAMESPACE::TFromD<D>)
584#define HWY_IF_I32_D(D) HWY_IF_I32(hwy::HWY_NAMESPACE::TFromD<D>)
585#define HWY_IF_I64_D(D) HWY_IF_I64(hwy::HWY_NAMESPACE::TFromD<D>)
589#define HWY_IF_UI8_D(D) HWY_IF_UI8(hwy::HWY_NAMESPACE::TFromD<D>)
590#define HWY_IF_UI16_D(D) HWY_IF_UI16(hwy::HWY_NAMESPACE::TFromD<D>)
591#define HWY_IF_UI32_D(D) HWY_IF_UI32(hwy::HWY_NAMESPACE::TFromD<D>)
592#define HWY_IF_UI64_D(D) HWY_IF_UI64(hwy::HWY_NAMESPACE::TFromD<D>)
594#define HWY_IF_BF16_D(D) HWY_IF_BF16(hwy::HWY_NAMESPACE::TFromD<D>)
595#define HWY_IF_NOT_BF16_D(D) HWY_IF_NOT_BF16(hwy::HWY_NAMESPACE::TFromD<D>)
597#define HWY_IF_F16_D(D) HWY_IF_F16(hwy::HWY_NAMESPACE::TFromD<D>)
598#define HWY_IF_NOT_F16_D(D) HWY_IF_NOT_F16(hwy::HWY_NAMESPACE::TFromD<D>)
600#define HWY_IF_F32_D(D) HWY_IF_F32(hwy::HWY_NAMESPACE::TFromD<D>)
601#define HWY_IF_F64_D(D) HWY_IF_F64(hwy::HWY_NAMESPACE::TFromD<D>)
603#define HWY_V_SIZE_D(D) \
604 (HWY_MAX_LANES_D(D) * sizeof(hwy::HWY_NAMESPACE::TFromD<D>))
605#define HWY_IF_V_SIZE_D(D, bytes) \
606 HWY_IF_V_SIZE(hwy::HWY_NAMESPACE::TFromD<D>, HWY_MAX_LANES_D(D), bytes)
607#define HWY_IF_V_SIZE_LE_D(D, bytes) \
608 HWY_IF_V_SIZE_LE(hwy::HWY_NAMESPACE::TFromD<D>, HWY_MAX_LANES_D(D), bytes)
609#define HWY_IF_V_SIZE_GT_D(D, bytes) \
610 HWY_IF_V_SIZE_GT(hwy::HWY_NAMESPACE::TFromD<D>, HWY_MAX_LANES_D(D), bytes)
613#define HWY_IF_UNSIGNED_V(V) HWY_IF_UNSIGNED(hwy::HWY_NAMESPACE::TFromV<V>)
614#define HWY_IF_NOT_UNSIGNED_V(V) \
615 HWY_IF_NOT_UNSIGNED(hwy::HWY_NAMESPACE::TFromV<V>)
616#define HWY_IF_SIGNED_V(V) HWY_IF_SIGNED(hwy::HWY_NAMESPACE::TFromV<V>)
617#define HWY_IF_FLOAT_V(V) HWY_IF_FLOAT(hwy::HWY_NAMESPACE::TFromV<V>)
618#define HWY_IF_NOT_FLOAT_V(V) HWY_IF_NOT_FLOAT(hwy::HWY_NAMESPACE::TFromV<V>)
619#define HWY_IF_SPECIAL_FLOAT_V(V) \
620 HWY_IF_SPECIAL_FLOAT(hwy::HWY_NAMESPACE::TFromV<V>)
621#define HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V) \
622 HWY_IF_NOT_FLOAT_NOR_SPECIAL(hwy::HWY_NAMESPACE::TFromV<V>)
624#define HWY_IF_T_SIZE_V(V, bytes) \
625 HWY_IF_T_SIZE(hwy::HWY_NAMESPACE::TFromV<V>, bytes)
626#define HWY_IF_NOT_T_SIZE_V(V, bytes) \
627 HWY_IF_NOT_T_SIZE(hwy::HWY_NAMESPACE::TFromV<V>, bytes)
628#define HWY_IF_T_SIZE_ONE_OF_V(V, bit_array) \
629 HWY_IF_T_SIZE_ONE_OF(hwy::HWY_NAMESPACE::TFromV<V>, bit_array)
631#define HWY_MAX_LANES_V(V) HWY_MAX_LANES_D(DFromV<V>)
632#define HWY_IF_V_SIZE_V(V, bytes) \
633 HWY_IF_V_SIZE(hwy::HWY_NAMESPACE::TFromV<V>, HWY_MAX_LANES_V(V), bytes)
634#define HWY_IF_V_SIZE_LE_V(V, bytes) \
635 HWY_IF_V_SIZE_LE(hwy::HWY_NAMESPACE::TFromV<V>, HWY_MAX_LANES_V(V), bytes)
636#define HWY_IF_V_SIZE_GT_V(V, bytes) \
637 HWY_IF_V_SIZE_GT(hwy::HWY_NAMESPACE::TFromV<V>, HWY_MAX_LANES_V(V), bytes)
641#undef HWY_IF_REDUCE_D
642#define HWY_IF_REDUCE_D(D) \
643 hwy::EnableIf<HWY_MAX_LANES_D(D) != 1 && \
644 (HWY_MAX_LANES_D(D) != 4 || \
645 sizeof(hwy::HWY_NAMESPACE::TFromD<D>) != 1)>* = nullptr
647#undef HWY_IF_SUM_OF_LANES_D
648#define HWY_IF_SUM_OF_LANES_D(D) HWY_IF_LANES_GT_D(D, 1)
650#undef HWY_IF_MINMAX_OF_LANES_D
651#define HWY_IF_MINMAX_OF_LANES_D(D) HWY_IF_LANES_GT_D(D, 1)
653#undef HWY_IF_ADDSUB_V
654#define HWY_IF_ADDSUB_V(V) HWY_IF_LANES_GT_D(DFromV<V>, 1)
656#undef HWY_IF_MULADDSUB_V
657#define HWY_IF_MULADDSUB_V(V) HWY_IF_LANES_GT_D(DFromV<V>, 1)
664#undef HWY_IF_U2I_DEMOTE_FROM_LANE_SIZE_V
665#define HWY_IF_U2I_DEMOTE_FROM_LANE_SIZE_V(V) void* = nullptr
668#define HWY_IF_LANE_SIZE_D(D, bytes) HWY_IF_T_SIZE_D(D, bytes)
669#define HWY_IF_NOT_LANE_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE_D(D, bytes)
#define HWY_MAX(a, b)
Definition base.h:177
#define HWY_RESTRICT
Definition base.h:95
#define HWY_API
Definition base.h:171
#define HWY_MIN(a, b)
Definition base.h:176
#define HWY_INLINE
Definition base.h:101
#define HWY_MAYBE_UNUSED
Definition base.h:113
HWY_INLINE void MaybeUnpoison(T *HWY_RESTRICT unaligned, size_t count)
Definition ops/shared-inl.h:151
HWY_INLINE T * NativeLanePointer(T *p)
Definition ops/shared-inl.h:111
typename NativeLaneTypeT< T >::type NativeLaneType
Definition ops/shared-inl.h:99
HWY_INLINE If< IsConst< T >(), const uint16_t *, uint16_t * > U16LanePointer(T *p)
Definition ops/shared-inl.h:139
constexpr size_t ScaleByPower(size_t N, int pow2)
Definition ops/shared-inl.h:146
constexpr bool IsFull(Simd< T, N, kPow2 >)
Definition ops/shared-inl.h:325
typename D::template Rebind< T > Rebind
Definition ops/shared-inl.h:460
Simd< TFromD< D >, HWY_MIN(16/sizeof(TFromD< D >), HWY_MAX_LANES_D(D)), 0 > BlockDFromD
Definition ops/shared-inl.h:517
D d
Definition arm_sve-inl.h:1915
RepartitionToWide< RepartitionToWideX2< D > > RepartitionToWideX3
Definition ops/shared-inl.h:483
V VecArg
Definition ops/shared-inl.h:69
HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t MaxLanes(D)
Definition ops/shared-inl.h:442
Repartition< MakeWide< TFromD< D > >, D > RepartitionToWide
Definition ops/shared-inl.h:474
D TFromD< D > *HWY_RESTRICT p
Definition arm_sve-inl.h:1915
typename D::T TFromD
Definition ops/shared-inl.h:426
HWY_API bool IsAligned(D d, T *ptr)
Definition ops/shared-inl.h:523
typename detail::CappedTagChecker< T, kLimit, kPow2 >::type CappedTag
Definition ops/shared-inl.h:379
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition ops/shared-inl.h:465
Repartition< MakeNarrow< TFromD< D > >, D > RepartitionToNarrow
Definition ops/shared-inl.h:476
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition ops/shared-inl.h:463
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition ops/shared-inl.h:367
RepartitionToWide< RepartitionToWide< D > > RepartitionToWideX2
Definition ops/shared-inl.h:480
typename D::Half Half
Definition ops/shared-inl.h:487
typename detail::FixedTagChecker< T, kNumLanes >::type FixedTag
Definition ops/shared-inl.h:407
CappedTag< T, kLimit, kPow2 > CappedTagIfFixed
Definition ops/shared-inl.h:386
HWY_API size_t Lanes(D)
Definition rvv-inl.h:598
Rebind< MakeFloat< TFromD< D > >, D > RebindToFloat
Definition ops/shared-inl.h:467
typename D::Twice Twice
Definition ops/shared-inl.h:491
typename D::template Repartition< T > Repartition
Definition ops/shared-inl.h:471
constexpr size_t FloorLog2(TI x)
Definition base.h:2662
typename IfT< Condition, Then, Else >::type If
Definition base.h:520
constexpr size_t CeilLog2(TI x)
Definition base.h:2669
#define HWY_MAX_LANES_D(D)
Definition ops/shared-inl.h:432
#define HWY_LANES(T)
Definition set_macros-inl.h:169
#define HWY_MAX_N
Definition set_macros-inl.h:61
#define HWY_MIN_POW2
Definition set_macros-inl.h:78
#define HWY_NAMESPACE
Definition set_macros-inl.h:166
#define HWY_MAX_POW2
Definition set_macros-inl.h:72
Definition ops/shared-inl.h:198
static constexpr size_t WholeN()
Definition ops/shared-inl.h:278
static constexpr size_t kPrivateLanes
Definition ops/shared-inl.h:243
static constexpr size_t NewN()
Definition ops/shared-inl.h:298
static constexpr int kFrac
Definition ops/shared-inl.h:215
static constexpr size_t FracN()
Definition ops/shared-inl.h:284
constexpr size_t MaxBytes() const
Definition ops/shared-inl.h:250
constexpr size_t MaxLanes() const
Definition ops/shared-inl.h:249
constexpr int Pow2() const
Definition ops/shared-inl.h:253
static constexpr int RebindPow2()
Definition ops/shared-inl.h:268
static constexpr int kPrivatePow2
Definition ops/shared-inl.h:247
constexpr size_t MaxBlocks() const
Definition ops/shared-inl.h:251
static constexpr size_t kWhole
Definition ops/shared-inl.h:213
Lane T
Definition ops/shared-inl.h:200
static constexpr size_t RepartitionLanes()
Definition ops/shared-inl.h:261
Definition ops/shared-inl.h:341
static constexpr size_t N
Definition ops/shared-inl.h:346
typename ClampNAndPow2< T, N, kPow2 >::type type
Definition ops/shared-inl.h:347
static constexpr size_t kLimitPow2
Definition ops/shared-inl.h:345
Definition ops/shared-inl.h:331
Definition ops/shared-inl.h:351
uint16_t type
Definition ops/shared-inl.h:91
uint16_t type
Definition ops/shared-inl.h:83
Definition ops/shared-inl.h:75
T type
Definition ops/shared-inl.h:76
Definition ops/shared-inl.h:336
typename ClampNAndPow2< T, HWY_LANES(T), kPow2 >::type type
Definition ops/shared-inl.h:337
uint16_t bits
Definition base.h:1606