Grok 12.0.1
traits-inl.h
Go to the documentation of this file.
1// Copyright 2021 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Per-target
17#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
20#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
21#else
22#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
23#endif
24
25#include <stddef.h>
26#include <stdint.h>
27
28#include "hwy/contrib/sort/order.h" // SortDescending
29#include "hwy/contrib/sort/shared-inl.h" // SortConstants
30#include "hwy/highway.h"
31
33namespace hwy {
34namespace HWY_NAMESPACE {
35namespace detail {
36
37// Base class of both KeyLane (with or without VQSORT_ENABLED)
38template <typename LaneTypeArg, typename KeyTypeArg>
40 static constexpr bool Is128() { return false; }
41 constexpr size_t LanesPerKey() const { return 1; }
42
43 // What type bench_sort should allocate for generating inputs.
44 using LaneType = LaneTypeArg;
45 // What type to pass to VQSort.
46 using KeyType = KeyTypeArg;
47
48 const char* KeyString() const {
49 return IsSame<KeyTypeArg, float16_t>() ? "f16"
50 : IsSame<KeyTypeArg, float>() ? "f32"
51 : IsSame<KeyTypeArg, double>() ? "f64"
52 : IsSame<KeyTypeArg, int16_t>() ? "i16"
53 : IsSame<KeyTypeArg, int32_t>() ? "i32"
54 : IsSame<KeyTypeArg, int64_t>() ? "i64"
55 : IsSame<KeyTypeArg, uint16_t>() ? "u32"
56 : IsSame<KeyTypeArg, uint32_t>() ? "u32"
57 : IsSame<KeyTypeArg, uint64_t>() ? "u64"
58 : IsSame<KeyTypeArg, hwy::K32V32>() ? "k+v=64"
59 : "?";
60 }
61};
62
63// Wrapper functions so we can specialize for floats - infinity trumps
64// HighestValue (the normal value with the largest magnitude). Must be outside
65// Order* classes to enable SFINAE. LargestSortValue is used even if
66// !VQSORT_ENABLED.
67
68template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
70 return Inf(d);
71}
72template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
74 return Set(d, hwy::HighestValue<TFromD<D>>());
75}
76
77template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
79 return Neg(Inf(d));
80}
81template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
83 return Set(d, hwy::LowestValue<TFromD<D>>());
84}
85
86// Returns the next distinct larger value unless already +inf.
87template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
89 HWY_DASSERT(AllFalse(d, IsNaN(v))); // we replaced all NaN with LastValue.
90 using T = TFromD<decltype(d)>;
91 const RebindToUnsigned<D> du;
92 using VU = Vec<decltype(du)>;
93 using TU = TFromD<decltype(du)>;
94
95 const VU vu = BitCast(du, Abs(v));
96
97 // The direction depends on the original sign. Integer comparison is cheaper
98 // than float comparison and treats -0 as 0 (so we return +epsilon).
99 const Mask<decltype(du)> was_pos = Le(BitCast(du, v), SignBit(du));
100 // If positive, add 1, else -1.
101 const VU add = IfThenElse(was_pos, Set(du, 1u), Set(du, LimitsMax<TU>()));
102 // Prev/next integer is the prev/next value, even if mantissa under/overflows.
103 v = BitCast(d, Add(vu, add));
104 // But we may have overflowed into inf or NaN; replace with inf if positive,
105 // but the largest (later negated!) value if the input was -inf.
106 const Mask<D> was_pos_f = RebindMask(d, was_pos);
107 v = IfThenElse(IsFinite(v), v,
108 IfThenElse(was_pos_f, Inf(d), Set(d, HighestValue<T>())));
109 // Restore the original sign - not via CopySignToAbs because we used a mask.
110 return IfThenElse(was_pos_f, v, Neg(v));
111}
112
113// Returns the next distinct smaller value unless already -inf.
114template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
116 HWY_DASSERT(AllFalse(d, IsNaN(v))); // we replaced all NaN with LastValue.
117 using T = TFromD<decltype(d)>;
118 const RebindToUnsigned<D> du;
119 using VU = Vec<decltype(du)>;
120 using TU = TFromD<decltype(du)>;
121
122 const VU vu = BitCast(du, Abs(v));
123
124 // The direction depends on the original sign. Float comparison because we
125 // want to treat 0 as -0 so we return -epsilon.
126 const Mask<D> was_pos = Gt(v, Zero(d));
127 // If positive, add -1, else 1.
128 const VU add =
129 IfThenElse(RebindMask(du, was_pos), Set(du, LimitsMax<TU>()), Set(du, 1));
130 // Prev/next integer is the prev/next value, even if mantissa under/overflows.
131 v = BitCast(d, Add(vu, add));
132 // But we may have overflowed into inf or NaN; replace with +inf (which will
133 // later be negated) if negative, but the largest value if the input was +inf.
134 v = IfThenElse(IsFinite(v), v,
135 IfThenElse(was_pos, Set(d, HighestValue<T>()), Inf(d)));
136 // Restore the original sign - not via CopySignToAbs because we used a mask.
137 return IfThenElse(was_pos, v, Neg(v));
138}
139
140template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
142 return Add(v, Set(d, TFromD<D>{1}));
143}
144
145template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
146Vec<D> SmallerSortValue(D d, Vec<D> v) {
147 return Sub(v, Set(d, TFromD<D>{1}));
148}
149
150#if VQSORT_ENABLED || HWY_IDE
151
152// Highway does not provide a lane type for 128-bit keys, so we use uint64_t
153// along with an abstraction layer for single-lane vs. lane-pair, which is
154// independent of the order.
155template <typename LaneType, typename KeyType>
156struct KeyLane : public KeyLaneBase<LaneType, KeyType> {
157 // For HeapSort
158 HWY_INLINE void Swap(LaneType* a, LaneType* b) const {
159 const LaneType temp = *a;
160 *a = *b;
161 *b = temp;
162 }
163
164 template <class V, class M>
165 HWY_INLINE V CompressKeys(V keys, M mask) const {
166 return CompressNot(keys, mask);
167 }
168
169 // Broadcasts one key into a vector
170 template <class D>
171 HWY_INLINE Vec<D> SetKey(D d, const LaneType* key) const {
172 return Set(d, *key);
173 }
174
175 template <class D>
176 HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
177 return Eq(a, b);
178 }
179
180 template <class D>
181 HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
182 return Ne(a, b);
183 }
184
185 // For keys=lanes, any difference counts.
186 template <class D>
187 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
188 // Must avoid floating-point comparisons (for -0)
189 const RebindToUnsigned<D> du;
190 return AllTrue(du, Eq(BitCast(du, diff), Zero(du)));
191 }
192
193 HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
194 return *a == *b;
195 }
196
197 template <class D>
198 HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const {
199 return Reverse(d, v);
200 }
201
202 template <class D>
203 HWY_INLINE Vec<D> ReverseKeys2(D d, Vec<D> v) const {
204 return Reverse2(d, v);
205 }
206
207 template <class D>
208 HWY_INLINE Vec<D> ReverseKeys4(D d, Vec<D> v) const {
209 return Reverse4(d, v);
210 }
211
212 template <class D>
213 HWY_INLINE Vec<D> ReverseKeys8(D d, Vec<D> v) const {
214 return Reverse8(d, v);
215 }
216
217 template <class D>
218 HWY_INLINE Vec<D> ReverseKeys16(D d, Vec<D> v) const {
219 static_assert(SortConstants::kMaxCols <= 16, "Assumes u32x16 = 512 bit");
220 return ReverseKeys(d, v);
221 }
222
223 template <class V>
224 HWY_INLINE V OddEvenKeys(const V odd, const V even) const {
225 return OddEven(odd, even);
226 }
227
228 template <class D, HWY_IF_T_SIZE_D(D, 2)>
229 HWY_INLINE Vec<D> SwapAdjacentPairs(D d, const Vec<D> v) const {
230 const Repartition<uint32_t, D> du32;
231 return BitCast(d, Shuffle2301(BitCast(du32, v)));
232 }
233 template <class D, HWY_IF_T_SIZE_D(D, 4)>
234 HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const {
235 return Shuffle1032(v);
236 }
237 template <class D, HWY_IF_T_SIZE_D(D, 8)>
238 HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const {
239 return SwapAdjacentBlocks(v);
240 }
241
242 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
243 HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const {
244#if HWY_HAVE_FLOAT64 // in case D is float32
245 const RepartitionToWide<D> dw;
246#else
247 const RepartitionToWide<RebindToUnsigned<D>> dw;
248#endif
249 return BitCast(d, SwapAdjacentPairs(dw, BitCast(dw, v)));
250 }
251 template <class D, HWY_IF_T_SIZE_D(D, 8)>
252 HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const {
253 // Assumes max vector size = 512
254 return ConcatLowerUpper(d, v, v);
255 }
256
257 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
258 HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd,
259 const Vec<D> even) const {
260#if HWY_HAVE_FLOAT64 // in case D is float32
261 const RepartitionToWide<D> dw;
262#else
263 const RepartitionToWide<RebindToUnsigned<D>> dw;
264#endif
265 return BitCast(d, OddEven(BitCast(dw, odd), BitCast(dw, even)));
266 }
267 template <class D, HWY_IF_T_SIZE_D(D, 8)>
268 HWY_INLINE Vec<D> OddEvenPairs(D /* tag */, Vec<D> odd, Vec<D> even) const {
269 return OddEvenBlocks(odd, even);
270 }
271
272 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
273 HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const {
274#if HWY_HAVE_FLOAT64 // in case D is float32
275 const RepartitionToWide<D> dw;
276#else
277 const RepartitionToWide<RebindToUnsigned<D>> dw;
278#endif
279 return BitCast(d, OddEvenPairs(dw, BitCast(dw, odd), BitCast(dw, even)));
280 }
281 template <class D, HWY_IF_T_SIZE_D(D, 8)>
282 HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const {
283 return ConcatUpperLower(d, odd, even);
284 }
285};
286
287// Anything order-related depends on the key traits *and* the order (see
288// FirstOfLanes). We cannot implement just one Compare function because Lt128
289// only compiles if the lane type is u64. Thus we need either overloaded
290// functions with a tag type, class specializations, or separate classes.
291// We avoid overloaded functions because we want all functions to be callable
292// from a SortTraits without per-function wrappers. Specializing would work, but
293// we are anyway going to specialize at a higher level.
294template <typename T>
295struct OrderAscending : public KeyLane<T, T> {
296 // False indicates the entire key (i.e. lane) should be compared. KV stands
297 // for key-value.
298 static constexpr bool IsKV() { return false; }
299
300 using Order = SortAscending;
301 using OrderForSortingNetwork = OrderAscending<T>;
302
303 HWY_INLINE bool Compare1(const T* a, const T* b) const { return *a < *b; }
304
305 template <class D>
306 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
307 return Lt(a, b);
308 }
309
310 // Two halves of Sort2, used in ScanMinMax.
311 template <class D>
312 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
313 return Min(a, b);
314 }
315
316 template <class D>
317 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
318 return Max(a, b);
319 }
320
321 template <class D>
322 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
323 T* HWY_RESTRICT /* buf */) const {
324 return MinOfLanes(d, v);
325 }
326
327 template <class D>
328 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
329 T* HWY_RESTRICT /* buf */) const {
330 return MaxOfLanes(d, v);
331 }
332
333 template <class D>
334 HWY_INLINE Vec<D> FirstValue(D d) const {
335 return SmallestSortValue(d);
336 }
337
338 template <class D>
339 HWY_INLINE Vec<D> LastValue(D d) const {
340 return LargestSortValue(d);
341 }
342
343 template <class D>
344 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
345 return SmallerSortValue(d, v);
346 }
347};
348
349template <typename T>
350struct OrderDescending : public KeyLane<T, T> {
351 // False indicates the entire key (i.e. lane) should be compared. KV stands
352 // for key-value.
353 static constexpr bool IsKV() { return false; }
354
355 using Order = SortDescending;
356 using OrderForSortingNetwork = OrderDescending<T>;
357
358 HWY_INLINE bool Compare1(const T* a, const T* b) const { return *b < *a; }
359
360 template <class D>
361 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
362 return Lt(b, a);
363 }
364
365 template <class D>
366 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
367 return Max(a, b);
368 }
369
370 template <class D>
371 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
372 return Min(a, b);
373 }
374
375 template <class D>
376 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
377 T* HWY_RESTRICT /* buf */) const {
378 return MaxOfLanes(d, v);
379 }
380
381 template <class D>
382 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
383 T* HWY_RESTRICT /* buf */) const {
384 return MinOfLanes(d, v);
385 }
386
387 template <class D>
388 HWY_INLINE Vec<D> FirstValue(D d) const {
389 return LargestSortValue(d);
390 }
391
392 template <class D>
393 HWY_INLINE Vec<D> LastValue(D d) const {
394 return SmallestSortValue(d);
395 }
396
397 template <class D>
398 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
399 return LargerSortValue(d, v);
400 }
401};
402
403struct KeyValue64 : public KeyLane<uint64_t, hwy::K32V32> {
404 // True indicates only part of the key (i.e. lane) should be compared. KV
405 // stands for key-value.
406 static constexpr bool IsKV() { return true; }
407
408 template <class D>
409 HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
410 return Eq(ShiftRight<32>(a), ShiftRight<32>(b));
411 }
412
413 template <class D>
414 HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
415 return Ne(ShiftRight<32>(a), ShiftRight<32>(b));
416 }
417
418 HWY_INLINE bool Equal1(const uint64_t* a, const uint64_t* b) const {
419 return (*a >> 32) == (*b >> 32);
420 }
421
422 // Only count differences in the actual key, not the value.
423 template <class D>
424 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
425 // Must avoid floating-point comparisons (for -0)
426 const RebindToUnsigned<D> du;
427 const Vec<decltype(du)> zero = Zero(du);
428 const Vec<decltype(du)> keys = ShiftRight<32>(diff); // clear values
429 return AllTrue(du, Eq(BitCast(du, keys), zero));
430 }
431};
432
433struct OrderAscendingKV64 : public KeyValue64 {
434 using Order = SortAscending;
435 using OrderForSortingNetwork = OrderAscending<LaneType>;
436
437 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
438 return (*a >> 32) < (*b >> 32);
439 }
440
441 template <class D>
442 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
443 return Lt(ShiftRight<32>(a), ShiftRight<32>(b));
444 }
445
446 // Not required to be stable (preserving the order of equivalent keys), so
447 // we can include the value in the comparison.
448 template <class D>
449 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
450 return Min(a, b);
451 }
452
453 template <class D>
454 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
455 return Max(a, b);
456 }
457
458 template <class D>
459 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
460 uint64_t* HWY_RESTRICT /* buf */) const {
461 return MinOfLanes(d, v);
462 }
463
464 template <class D>
465 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
466 uint64_t* HWY_RESTRICT /* buf */) const {
467 return MaxOfLanes(d, v);
468 }
469
470 // Same as for regular lanes.
471 template <class D>
472 HWY_INLINE Vec<D> FirstValue(D d) const {
473 return Set(d, hwy::LowestValue<TFromD<D>>());
474 }
475
476 template <class D>
477 HWY_INLINE Vec<D> LastValue(D d) const {
478 return Set(d, hwy::HighestValue<TFromD<D>>());
479 }
480
481 template <class D>
482 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
483 return Sub(v, Set(d, uint64_t{1} << 32));
484 }
485};
486
487struct OrderDescendingKV64 : public KeyValue64 {
488 using Order = SortDescending;
489 using OrderForSortingNetwork = OrderDescending<LaneType>;
490
491 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
492 return (*b >> 32) < (*a >> 32);
493 }
494
495 template <class D>
496 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
497 return Lt(ShiftRight<32>(b), ShiftRight<32>(a));
498 }
499
500 // Not required to be stable (preserving the order of equivalent keys), so
501 // we can include the value in the comparison.
502 template <class D>
503 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
504 return Max(a, b);
505 }
506
507 template <class D>
508 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
509 return Min(a, b);
510 }
511
512 template <class D>
513 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
514 uint64_t* HWY_RESTRICT /* buf */) const {
515 return MaxOfLanes(d, v);
516 }
517
518 template <class D>
519 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
520 uint64_t* HWY_RESTRICT /* buf */) const {
521 return MinOfLanes(d, v);
522 }
523
524 template <class D>
525 HWY_INLINE Vec<D> FirstValue(D d) const {
526 return Set(d, hwy::HighestValue<TFromD<D>>());
527 }
528
529 template <class D>
530 HWY_INLINE Vec<D> LastValue(D d) const {
531 return Set(d, hwy::LowestValue<TFromD<D>>());
532 }
533
534 template <class D>
535 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
536 return Add(v, Set(d, uint64_t{1} << 32));
537 }
538};
539
540// Shared code that depends on Order.
541template <class Base>
542struct TraitsLane : public Base {
543 using TraitsForSortingNetwork =
544 TraitsLane<typename Base::OrderForSortingNetwork>;
545
546 // For each lane i: replaces a[i] with the first and b[i] with the second
547 // according to Base.
548 // Corresponds to a conditional swap, which is one "node" of a sorting
549 // network. Min/Max are cheaper than compare + blend at least for integers.
550 template <class D>
551 HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const {
552 const Base* base = static_cast<const Base*>(this);
553
554 const Vec<D> a_copy = a;
555 // Prior to AVX3, there is no native 64-bit Min/Max, so they compile to 4
556 // instructions. We can reduce it to a compare + 2 IfThenElse.
557#if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3
558 if (sizeof(TFromD<D>) == 8) {
559 const Mask<D> cmp = base->Compare(d, a, b);
560 a = IfThenElse(cmp, a, b);
561 b = IfThenElse(cmp, b, a_copy);
562 return;
563 }
564#endif
565 a = base->First(d, a, b);
566 b = base->Last(d, a_copy, b);
567 }
568
569 // Conditionally swaps even-numbered lanes with their odd-numbered neighbor.
570 template <class D, HWY_IF_T_SIZE_D(D, 8)>
571 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
572 const Base* base = static_cast<const Base*>(this);
573 Vec<D> swapped = base->ReverseKeys2(d, v);
574 // Further to the above optimization, Sort2+OddEvenKeys compile to four
575 // instructions; we can save one by combining two blends.
576#if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3
577 const Vec<D> cmp = VecFromMask(d, base->Compare(d, v, swapped));
578 return IfVecThenElse(DupOdd(cmp), swapped, v);
579#else
580 Sort2(d, v, swapped);
581 return base->OddEvenKeys(swapped, v);
582#endif
583 }
584
585 // (See above - we use Sort2 for non-64-bit types.)
586 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
587 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
588 const Base* base = static_cast<const Base*>(this);
589 Vec<D> swapped = base->ReverseKeys2(d, v);
590 Sort2(d, v, swapped);
591 return base->OddEvenKeys(swapped, v);
592 }
593
594 // Swaps with the vector formed by reversing contiguous groups of 4 keys.
595 template <class D>
596 HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const {
597 const Base* base = static_cast<const Base*>(this);
598 Vec<D> swapped = base->ReverseKeys4(d, v);
599 Sort2(d, v, swapped);
600 return base->OddEvenPairs(d, swapped, v);
601 }
602
603 // Conditionally swaps lane 0 with 4, 1 with 5 etc.
604 template <class D>
605 HWY_INLINE Vec<D> SortPairsDistance4(D d, Vec<D> v) const {
606 const Base* base = static_cast<const Base*>(this);
607 Vec<D> swapped = base->SwapAdjacentQuads(d, v);
608 // Only used in Merge16, so this will not be used on AVX2 (which only has 4
609 // u64 lanes), so skip the above optimization for 64-bit AVX2.
610 Sort2(d, v, swapped);
611 return base->OddEvenQuads(d, swapped, v);
612 }
613};
614
615#else
616
617template <typename T>
618struct OrderAscending : public KeyLaneBase<T, T> {
620
621 HWY_INLINE bool Compare1(const T* a, const T* b) const { return *a < *b; }
622
623 template <class D>
625 return Lt(a, b);
626 }
627
628 template <class D>
630 return LargestSortValue(d);
631 }
632};
633
634template <typename T>
635struct OrderDescending : public KeyLaneBase<T, T> {
637
638 HWY_INLINE bool Compare1(const T* a, const T* b) const { return *b < *a; }
639
640 template <class D>
642 return Lt(b, a);
643 }
644
645 template <class D>
647 return SmallestSortValue(d);
648 }
649};
650
651template <class Order>
652struct TraitsLane : public Order {
653 // For HeapSort
654 template <typename T> // MSVC doesn't find typename Order::LaneType.
655 HWY_INLINE void Swap(T* a, T* b) const {
656 const T temp = *a;
657 *a = *b;
658 *b = temp;
659 }
660
661 template <class D>
662 HWY_INLINE Vec<D> SetKey(D d, const TFromD<D>* key) const {
663 return Set(d, *key);
664 }
665};
666
667#endif // VQSORT_ENABLED
668
669} // namespace detail
670// NOLINTNEXTLINE(google-readability-namespace-comments)
671} // namespace HWY_NAMESPACE
672} // namespace hwy
674
675#endif // HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
#define HWY_RESTRICT
Definition base.h:95
#define HWY_INLINE
Definition base.h:101
#define HWY_DASSERT(condition)
Definition base.h:290
HWY_API Vec128< T, N > Neg(hwy::NonFloatTag, Vec128< T, N > v)
Definition emu128-inl.h:744
HWY_INLINE Vec128< T, N > Max(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:689
HWY_INLINE Vec128< T, N > Add(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:560
Vec< D > SmallerSortValue(D d, Vec< D > v)
Definition traits-inl.h:115
HWY_INLINE Vec128< T, N > Min(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:681
HWY_INLINE bool AllFalse(hwy::SizeTag< 1 >, const Mask256< T > mask)
Definition x86_256-inl.h:7076
HWY_INLINE Vec128< T, N > Sub(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:570
Vec< D > LargestSortValue(D d)
Definition traits-inl.h:69
Vec< D > LargerSortValue(D d, Vec< D > v)
Definition traits-inl.h:88
HWY_INLINE Vec128< T, N > IfThenElse(hwy::SizeTag< 1 >, Mask128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition x86_128-inl.h:1269
HWY_INLINE Vec128< T, N > CompressNot(Vec128< T, N > v, uint64_t mask_bits)
Definition arm_neon-inl.h:8860
Vec< D > SmallestSortValue(D d)
Definition traits-inl.h:78
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition arm_neon-inl.h:7156
HWY_API VFromD< D > VecFromMask(D d, const MFromD< D > m)
Definition arm_neon-inl.h:2960
HWY_API Vec128< T, N > DupOdd(Vec128< T, N > v)
Definition arm_neon-inl.h:7091
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7339
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7331
HWY_API Mask128< T, N > IsNaN(const Vec128< T, N > v)
Definition arm_neon-inl.h:5093
D d
Definition arm_sve-inl.h:1915
HWY_API VFromD< D > BitCast(D d, Vec128< FromT, Repartition< FromT, D >().MaxLanes()> v)
Definition arm_neon-inl.h:1581
HWY_API VFromD< D > MaxOfLanes(D d, VFromD< D > v)
Definition arm_sve-inl.h:3228
HWY_API VFromD< D > ConcatLowerUpper(D d, VFromD< D > hi, VFromD< D > lo)
Definition arm_neon-inl.h:6965
HWY_API Vec128< uint64_t, N > Max(Vec128< uint64_t, N > a, Vec128< uint64_t, N > b)
Definition arm_neon-inl.h:3377
HWY_API Vec128< T > Shuffle1032(Vec128< T > v)
Definition arm_neon-inl.h:6008
HWY_API VFromD< D > Zero(D d)
Definition arm_neon-inl.h:947
HWY_API Vec128< T, 1 > Reverse(D, Vec128< T, 1 > v)
Definition arm_neon-inl.h:5959
HWY_API Vec128< uint64_t, N > Min(Vec128< uint64_t, N > a, Vec128< uint64_t, N > b)
Definition arm_neon-inl.h:3311
HWY_API Vec128< int64_t > Abs(const Vec128< int64_t > v)
Definition arm_neon-inl.h:3271
typename D::T TFromD
Definition ops/shared-inl.h:426
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition arm_neon-inl.h:2785
HWY_API VFromD< D > MinOfLanes(D d, VFromD< D > v)
Definition arm_sve-inl.h:3224
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition arm_neon-inl.h:7162
HWY_API VFromD< D > ConcatUpperLower(D d, VFromD< D > hi, VFromD< D > lo)
Definition arm_neon-inl.h:6989
HWY_API Vec64< uint32_t > Shuffle2301(const Vec64< uint32_t > v)
Definition arm_neon-inl.h:3084
HWY_API Vec< D > SignBit(D d)
Definition generic_ops-inl.h:75
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition ops/shared-inl.h:465
HWY_API Vec< D > Inf(D d)
Definition generic_ops-inl.h:91
HWY_API bool AllTrue(D d, Mask128< T > m)
Definition arm_neon-inl.h:8416
HWY_API svbool_t Gt(const V a, const V b)
Definition arm_sve-inl.h:1578
decltype(MaskFromVec(Zero(D()))) Mask
Definition generic_ops-inl.h:52
HWY_API MFromD< DTo > RebindMask(DTo, Mask128< TFrom, NFrom > m)
Definition arm_neon-inl.h:2969
HWY_INLINE Vec128< TFromD< D > > Set(D, T t)
Definition arm_neon-inl.h:931
HWY_API VFromD< D > Reverse8(D d, VFromD< D > v)
Definition arm_neon-inl.h:5935
HWY_API auto Le(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7353
HWY_API VFromD< D > Reverse4(D d, VFromD< D > v)
Definition arm_neon-inl.h:5900
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:7107
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:46
HWY_API auto Ne(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7335
decltype(GetLane(V())) LaneType
Definition generic_ops-inl.h:39
HWY_API svbool_t IsFinite(const V v)
Definition arm_sve-inl.h:1725
HWY_API VFromD< D > Reverse2(D d, VFromD< D > v)
Definition arm_neon-inl.h:5860
Definition abort.h:8
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T LowestValue()
Definition base.h:2191
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T HighestValue()
Definition base.h:2212
#define HWY_NAMESPACE
Definition set_macros-inl.h:166
Definition traits-inl.h:39
KeyTypeArg KeyType
Definition traits-inl.h:46
static constexpr bool Is128()
Definition traits-inl.h:40
constexpr size_t LanesPerKey() const
Definition traits-inl.h:41
LaneTypeArg LaneType
Definition traits-inl.h:44
const char * KeyString() const
Definition traits-inl.h:48
HWY_INLINE Vec< D > LastValue(D d) const
Definition traits-inl.h:629
HWY_INLINE bool Compare1(const T *a, const T *b) const
Definition traits-inl.h:621
HWY_INLINE Mask< D > Compare(D, Vec< D > a, Vec< D > b)
Definition traits-inl.h:624
HWY_INLINE bool Compare1(const T *a, const T *b) const
Definition traits-inl.h:638
HWY_INLINE Mask< D > Compare(D, Vec< D > a, Vec< D > b)
Definition traits-inl.h:641
HWY_INLINE Vec< D > LastValue(D d) const
Definition traits-inl.h:646
Definition traits-inl.h:652
HWY_INLINE void Swap(T *a, T *b) const
Definition traits-inl.h:655
HWY_INLINE Vec< D > SetKey(D d, const TFromD< D > *key) const
Definition traits-inl.h:662
Definition order.h:25
Definition order.h:28
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()