17#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
20#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
22#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
37#if VQSORT_ENABLED || HWY_IDE
43 static constexpr bool Is128() {
return true; }
44 constexpr size_t LanesPerKey()
const {
return 2; }
50 HWY_INLINE void Swap(LaneType* a, LaneType* b)
const {
51 const FixedTag<LaneType, 2>
d;
52 const auto temp =
LoadU(d, a);
57 template <
class V,
class M>
58 HWY_INLINE V CompressKeys(V keys, M mask)
const {
63 HWY_INLINE Vec<D> SetKey(D d,
const TFromD<D>* key)
const {
68 HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v)
const {
73 HWY_INLINE Vec<D> ReverseKeys2(D ,
const Vec<D> v)
const {
79 HWY_INLINE Vec<D> ReverseKeys4(D d,
const Vec<D> v)
const {
81 return ReverseKeys(d, v);
86 HWY_INLINE Vec<D> OddEvenPairs(D d,
const Vec<D> odd,
87 const Vec<D> even)
const {
93 HWY_INLINE V OddEvenKeys(
const V odd,
const V even)
const {
98 HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>)
const {
103 HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>)
const {
109 HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>)
const {
115 HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>)
const {
121 HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>)
const {
127struct Key128 :
public KeyAny128 {
129 static constexpr bool IsKV() {
return false; }
134 const char* KeyString()
const {
return "U128"; }
137 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b)
const {
138 return Eq128(d, a, b);
142 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b)
const {
143 return Ne128(d, a, b);
148 HWY_INLINE bool NoKeyDifference(D , Vec<D> diff)
const {
150 const RebindToUnsigned<D> du;
154 HWY_INLINE bool Equal1(
const LaneType* a,
const LaneType* b)
const {
155 return a[0] == b[0] && a[1] == b[1];
160 template <
class Order,
class D>
162 const Mask<D> eqHL =
Eq(a, b);
163 const Vec<D> ltHL =
VecFromMask(d, Order().CompareLanes(a, b));
164#if HWY_TARGET <= HWY_AVX2
165 const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
180struct OrderAscending128 :
public Key128 {
181 using Order = SortAscending;
182 using OrderForSortingNetwork = OrderAscending128;
184 HWY_INLINE bool Compare1(
const LaneType* a,
const LaneType* b)
const {
185 return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
189 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b)
const {
190 return Lt128(d, a, b);
195 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
200 HWY_INLINE Vec<D> First(D d,
const Vec<D> a,
const Vec<D> b)
const {
205 HWY_INLINE Vec<D> Last(D d,
const Vec<D> a,
const Vec<D> b)
const {
221 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
222 const Vec<D> k0 =
Zero(d);
223 const Vec<D> k1 =
OddEven(k0,
Set(d, uint64_t{1}));
224 const Mask<D> borrow =
Eq(v, k0);
226 const Vec<D> adjust = ShiftLeftLanes<1>(
IfThenElseZero(borrow, k1));
227 return Sub(
Sub(v, k1), adjust);
231struct OrderDescending128 :
public Key128 {
232 using Order = SortDescending;
233 using OrderForSortingNetwork = OrderDescending128;
235 HWY_INLINE bool Compare1(
const LaneType* a,
const LaneType* b)
const {
236 return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
240 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b)
const {
241 return Lt128(d, b, a);
246 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
251 HWY_INLINE Vec<D> First(D d,
const Vec<D> a,
const Vec<D> b)
const {
256 HWY_INLINE Vec<D> Last(D d,
const Vec<D> a,
const Vec<D> b)
const {
272 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
274 const Vec<D> added =
Add(v, k1);
275 const Mask<D> overflowed =
Lt(added, v);
277 const Vec<D> adjust = ShiftLeftLanes<1>(
IfThenElseZero(overflowed, k1));
278 return Add(added, adjust);
283struct KeyValue128 :
public KeyAny128 {
286 static constexpr bool IsKV() {
return true; }
289 using KeyType = K64V64;
291 const char* KeyString()
const {
return "k+v=128"; }
294 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b)
const {
299 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b)
const {
305 HWY_INLINE bool NoKeyDifference(D , Vec<D> diff)
const {
307 const RebindToUnsigned<D> du;
308 const Vec<
decltype(du)> zero =
Zero(du);
309 const Vec<
decltype(du)> keys =
OddEven(diff, zero);
313 HWY_INLINE bool Equal1(
const LaneType* a,
const LaneType* b)
const {
319 template <
class Order,
class D>
327struct OrderAscendingKV128 :
public KeyValue128 {
328 using Order = SortAscending;
329 using OrderForSortingNetwork = OrderAscending128;
331 HWY_INLINE bool Compare1(
const LaneType* a,
const LaneType* b)
const {
336 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b)
const {
342 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
347 HWY_INLINE Vec<D> First(D d,
const Vec<D> a,
const Vec<D> b)
const {
352 HWY_INLINE Vec<D> Last(D d,
const Vec<D> a,
const Vec<D> b)
const {
368 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
374struct OrderDescendingKV128 :
public KeyValue128 {
375 using Order = SortDescending;
376 using OrderForSortingNetwork = OrderDescending128;
378 HWY_INLINE bool Compare1(
const LaneType* a,
const LaneType* b)
const {
383 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b)
const {
389 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
394 HWY_INLINE Vec<D> First(D d,
const Vec<D> a,
const Vec<D> b)
const {
399 HWY_INLINE Vec<D> Last(D d,
const Vec<D> a,
const Vec<D> b)
const {
415 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v)
const {
425#if HWY_TARGET <= HWY_AVX3
426template <
class V, HWY_IF_V_SIZE_V(V, 64)>
428 return V{_mm512_permutex_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
432#if HWY_TARGET <= HWY_AVX2
434template <
class V, HWY_IF_V_SIZE_V(V, 32)>
436 return V{_mm256_permute4x64_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
443#if HWY_TARGET == HWY_SVE_256
444 return svdup_lane_u64(v, 3);
446 HWY_ALIGN static constexpr uint64_t kIndices[8] = {3, 3, 3, 3, 7, 7, 7, 7};
447 const ScalableTag<uint64_t>
d;
456struct Traits128 :
public Base {
457 using TraitsForSortingNetwork =
458 Traits128<typename Base::OrderForSortingNetwork>;
463 const Base* base =
static_cast<const Base*
>(
this);
464 const size_t N =
Lanes(d);
466 v = base->SetKey(d, buf + 0);
467 for (
size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
468 v = base->First(d, v, base->SetKey(d, buf + i));
476 const Base* base =
static_cast<const Base*
>(
this);
477 const size_t N =
Lanes(d);
479 v = base->SetKey(d, buf + 0);
480 for (
size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
481 v = base->Last(d, v, base->SetKey(d, buf + i));
487 HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b)
const {
488 const Base* base =
static_cast<const Base*
>(
this);
490 const Vec<D> a_copy = a;
491 const auto lt = base->Compare(d, a, b);
498 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v)
const {
499 const Base* base =
static_cast<const Base*
>(
this);
500 Vec<D> swapped = base->ReverseKeys2(d, v);
501 const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
508 HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v)
const {
509 const Base* base =
static_cast<const Base*
>(
this);
510 Vec<D> swapped = base->ReverseKeys4(d, v);
512 const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
515 HWY_ALIGN uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
522 HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>)
const {
#define HWY_RESTRICT
Definition base.h:95
#define HWY_INLINE
Definition base.h:101
#define HWY_DASSERT(condition)
Definition base.h:290
#define HWY_MAYBE_UNUSED
Definition base.h:113
#define HWY_ASSERT(condition)
Definition base.h:237
HWY_INLINE Vec128< T, N > Add(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:560
HWY_INLINE Vec128< T, N > Sub(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:570
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition arm_neon-inl.h:7156
HWY_API VFromD< D > VecFromMask(D d, const MFromD< D > m)
Definition arm_neon-inl.h:2960
HWY_INLINE VFromD< D > Max128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9480
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7339
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7331
D d
Definition arm_sve-inl.h:1915
HWY_INLINE VFromD< D > Max128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9490
HWY_API V IfThenElse(MFromD< DFromV< V > > mask, V yes, V no)
Definition arm_neon-inl.h:2992
HWY_API VFromD< D > BitCast(D d, Vec128< FromT, Repartition< FromT, D >().MaxLanes()> v)
Definition arm_neon-inl.h:1581
HWY_API void Store(VFromD< D > v, D d, TFromD< D > *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:3911
HWY_API Vec128< uint8_t > LoadU(D, const uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:3442
HWY_INLINE MFromD< D > Ne128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9466
HWY_INLINE MFromD< D > Lt128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9436
HWY_API VFromD< D > Zero(D d)
Definition arm_neon-inl.h:947
HWY_API Vec128< uint64_t > CompressBlocksNot(Vec128< uint64_t > v, Mask128< uint64_t >)
Definition arm_neon-inl.h:8924
HWY_API void StoreU(Vec128< uint8_t > v, D, uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:3689
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition arm_neon-inl.h:2785
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition arm_neon-inl.h:7074
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition arm_neon-inl.h:5775
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition arm_neon-inl.h:7162
HWY_API VFromD< D > ConcatUpperLower(D d, VFromD< D > hi, VFromD< D > lo)
Definition arm_neon-inl.h:6989
HWY_INLINE MFromD< D > Eq128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9444
HWY_API VFromD< D > ReverseBlocks(D, VFromD< D > v)
Definition arm_neon-inl.h:7169
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition arm_neon-inl.h:2779
HWY_API VFromD< D > LoadDup128(D d, const TFromD< D > *HWY_RESTRICT p)
Definition arm_neon-inl.h:3682
HWY_API bool AllTrue(D d, Mask128< T > m)
Definition arm_neon-inl.h:8416
HWY_API Indices128< TFromD< D >, MaxLanes(D())> SetTableIndices(D d, const TI *idx)
Definition arm_neon-inl.h:5768
HWY_INLINE Vec128< TFromD< D > > Set(D, T t)
Definition arm_neon-inl.h:931
HWY_INLINE MFromD< D > Lt128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9409
HWY_INLINE VFromD< D > Min128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9475
HWY_API Vec128< T, N > IfThenElseZero(Mask128< T, N > mask, Vec128< T, N > yes)
Definition arm_neon-inl.h:3007
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:7107
HWY_INLINE MFromD< D > Eq128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9451
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:46
HWY_API size_t Lanes(D)
Definition rvv-inl.h:598
HWY_INLINE MFromD< D > Ne128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9459
decltype(GetLane(V())) LaneType
Definition generic_ops-inl.h:39
HWY_INLINE VFromD< D > Min128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9485
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T LowestValue()
Definition base.h:2191
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T HighestValue()
Definition base.h:2212
#define HWY_ALIGN
Definition set_macros-inl.h:167
#define HWY_NAMESPACE
Definition set_macros-inl.h:166