Grok 12.0.1
traits128-inl.h
Go to the documentation of this file.
1// Copyright 2021 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Per-target
17#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
20#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
21#else
22#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
23#endif
24
25#include <stddef.h>
26#include <stdint.h>
27
28#include "hwy/contrib/sort/order.h" // SortDescending
30#include "hwy/highway.h"
31
33namespace hwy {
34namespace HWY_NAMESPACE {
35namespace detail {
36
37#if VQSORT_ENABLED || HWY_IDE
38
39// Highway does not provide a lane type for 128-bit keys, so we use uint64_t
40// along with an abstraction layer for single-lane vs. lane-pair, which is
41// independent of the order.
42struct KeyAny128 {
43 static constexpr bool Is128() { return true; }
44 constexpr size_t LanesPerKey() const { return 2; }
45
46 // What type bench_sort should allocate for generating inputs.
47 using LaneType = uint64_t;
48 // KeyType and KeyString are defined by derived classes.
49
50 HWY_INLINE void Swap(LaneType* a, LaneType* b) const {
51 const FixedTag<LaneType, 2> d;
52 const auto temp = LoadU(d, a);
53 StoreU(LoadU(d, b), d, a);
54 StoreU(temp, d, b);
55 }
56
57 template <class V, class M>
58 HWY_INLINE V CompressKeys(V keys, M mask) const {
59 return CompressBlocksNot(keys, mask);
60 }
61
62 template <class D>
63 HWY_INLINE Vec<D> SetKey(D d, const TFromD<D>* key) const {
64 return LoadDup128(d, key);
65 }
66
67 template <class D>
68 HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const {
69 return ReverseBlocks(d, v);
70 }
71
72 template <class D>
73 HWY_INLINE Vec<D> ReverseKeys2(D /* tag */, const Vec<D> v) const {
74 return SwapAdjacentBlocks(v);
75 }
76
77 // Only called for 4 keys because we do not support >512-bit vectors.
78 template <class D>
79 HWY_INLINE Vec<D> ReverseKeys4(D d, const Vec<D> v) const {
80 HWY_DASSERT(Lanes(d) <= 64 / sizeof(TFromD<D>));
81 return ReverseKeys(d, v);
82 }
83
84 // Only called for 4 keys because we do not support >512-bit vectors.
85 template <class D>
86 HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd,
87 const Vec<D> even) const {
88 HWY_DASSERT(Lanes(d) <= 64 / sizeof(TFromD<D>));
89 return ConcatUpperLower(d, odd, even);
90 }
91
92 template <class V>
93 HWY_INLINE V OddEvenKeys(const V odd, const V even) const {
94 return OddEvenBlocks(odd, even);
95 }
96
97 template <class D>
98 HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>) const {
99 HWY_ASSERT(0); // not supported: would require 1024-bit vectors
100 }
101
102 template <class D>
103 HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>) const {
104 HWY_ASSERT(0); // not supported: would require 2048-bit vectors
105 }
106
107 // This is only called for 8/16 col networks (not supported).
108 template <class D>
109 HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>) const {
110 HWY_ASSERT(0);
111 }
112
113 // This is only called for 16 col networks (not supported).
114 template <class D>
115 HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>) const {
116 HWY_ASSERT(0);
117 }
118
119 // This is only called for 8 col networks (not supported).
120 template <class D>
121 HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>) const {
122 HWY_ASSERT(0);
123 }
124};
125
126// Base class shared between OrderAscending128, OrderDescending128.
127struct Key128 : public KeyAny128 {
128 // False indicates the entire key should be compared. KV means key-value.
129 static constexpr bool IsKV() { return false; }
130
131 // What type to pass to VQSort.
132 using KeyType = hwy::uint128_t;
133
134 const char* KeyString() const { return "U128"; }
135
136 template <class D>
137 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
138 return Eq128(d, a, b);
139 }
140
141 template <class D>
142 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
143 return Ne128(d, a, b);
144 }
145
146 // For keys=entire 128 bits, any difference counts.
147 template <class D>
148 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
149 // Must avoid floating-point comparisons (for -0)
150 const RebindToUnsigned<D> du;
151 return AllTrue(du, Eq(BitCast(du, diff), Zero(du)));
152 }
153
154 HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
155 return a[0] == b[0] && a[1] == b[1];
156 }
157
158 // Returns vector with only the top half of each block valid. This allows
159 // fusing the "replicate upper to lower half" step with a subsequent permute.
160 template <class Order, class D>
161 HWY_INLINE HWY_MAYBE_UNUSED Vec<D> CompareTop(D d, Vec<D> a, Vec<D> b) const {
162 const Mask<D> eqHL = Eq(a, b);
163 const Vec<D> ltHL = VecFromMask(d, Order().CompareLanes(a, b));
164#if HWY_TARGET <= HWY_AVX2 // slightly faster
165 const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
166 return OrAnd(ltHL, VecFromMask(d, eqHL), ltLX);
167#else
168 return IfThenElse(eqHL, DupEven(ltHL), ltHL);
169#endif
170 }
171};
172
173// Anything order-related depends on the key traits *and* the order (see
174// FirstOfLanes). We cannot implement just one Compare function because Lt128
175// only compiles if the lane type is u64. Thus we need either overloaded
176// functions with a tag type, class specializations, or separate classes.
177// We avoid overloaded functions because we want all functions to be callable
178// from a SortTraits without per-function wrappers. Specializing would work, but
179// we are anyway going to specialize at a higher level.
180struct OrderAscending128 : public Key128 {
181 using Order = SortAscending;
182 using OrderForSortingNetwork = OrderAscending128;
183
184 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
185 return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
186 }
187
188 template <class D>
189 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
190 return Lt128(d, a, b);
191 }
192
193 // Used by CompareTop
194 template <class V>
195 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
196 return Lt(a, b);
197 }
198
199 template <class D>
200 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
201 return Min128(d, a, b);
202 }
203
204 template <class D>
205 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
206 return Max128(d, a, b);
207 }
208
209 // Same as for regular lanes because 128-bit keys are u64.
210 template <class D>
211 HWY_INLINE Vec<D> FirstValue(D d) const {
212 return Set(d, hwy::LowestValue<TFromD<D> >());
213 }
214
215 template <class D>
216 HWY_INLINE Vec<D> LastValue(D d) const {
217 return Set(d, hwy::HighestValue<TFromD<D> >());
218 }
219
220 template <class D>
221 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
222 const Vec<D> k0 = Zero(d);
223 const Vec<D> k1 = OddEven(k0, Set(d, uint64_t{1}));
224 const Mask<D> borrow = Eq(v, k0); // don't-care, lo == 0
225 // lo == 0? 1 : 0, 0
226 const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(borrow, k1));
227 return Sub(Sub(v, k1), adjust);
228 }
229};
230
231struct OrderDescending128 : public Key128 {
232 using Order = SortDescending;
233 using OrderForSortingNetwork = OrderDescending128;
234
235 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
236 return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
237 }
238
239 template <class D>
240 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
241 return Lt128(d, b, a);
242 }
243
244 // Used by CompareTop
245 template <class V>
246 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
247 return Lt(b, a);
248 }
249
250 template <class D>
251 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
252 return Max128(d, a, b);
253 }
254
255 template <class D>
256 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
257 return Min128(d, a, b);
258 }
259
260 // Same as for regular lanes because 128-bit keys are u64.
261 template <class D>
262 HWY_INLINE Vec<D> FirstValue(D d) const {
263 return Set(d, hwy::HighestValue<TFromD<D> >());
264 }
265
266 template <class D>
267 HWY_INLINE Vec<D> LastValue(D d) const {
268 return Set(d, hwy::LowestValue<TFromD<D> >());
269 }
270
271 template <class D>
272 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
273 const Vec<D> k1 = OddEven(Zero(d), Set(d, uint64_t{1}));
274 const Vec<D> added = Add(v, k1);
275 const Mask<D> overflowed = Lt(added, v); // false, overflowed
276 // overflowed? 1 : 0, 0
277 const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(overflowed, k1));
278 return Add(added, adjust);
279 }
280};
281
282// Base class shared between OrderAscendingKV128, OrderDescendingKV128.
283struct KeyValue128 : public KeyAny128 {
284 // True indicates only part of the key (the more significant lane) should be
285 // compared. KV stands for key-value.
286 static constexpr bool IsKV() { return true; }
287
288 // What type to pass to VQSort.
289 using KeyType = K64V64;
290
291 const char* KeyString() const { return "k+v=128"; }
292
293 template <class D>
294 HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
295 return Eq128Upper(d, a, b);
296 }
297
298 template <class D>
299 HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
300 return Ne128Upper(d, a, b);
301 }
302
303 // Only count differences in the actual key, not the value.
304 template <class D>
305 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
306 // Must avoid floating-point comparisons (for -0)
307 const RebindToUnsigned<D> du;
308 const Vec<decltype(du)> zero = Zero(du);
309 const Vec<decltype(du)> keys = OddEven(diff, zero); // clear values
310 return AllTrue(du, Eq(BitCast(du, keys), zero));
311 }
312
313 HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
314 return a[1] == b[1];
315 }
316
317 // Returns vector with only the top half of each block valid. This allows
318 // fusing the "replicate upper to lower half" step with a subsequent permute.
319 template <class Order, class D>
320 HWY_INLINE HWY_MAYBE_UNUSED Vec<D> CompareTop(D d, Vec<D> a, Vec<D> b) const {
321 // Only the upper lane of each block is a key, and only that lane is
322 // required to be valid, so comparing all lanes is sufficient.
323 return VecFromMask(d, Order().CompareLanes(a, b));
324 }
325};
326
327struct OrderAscendingKV128 : public KeyValue128 {
328 using Order = SortAscending;
329 using OrderForSortingNetwork = OrderAscending128;
330
331 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
332 return a[1] < b[1];
333 }
334
335 template <class D>
336 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
337 return Lt128Upper(d, a, b);
338 }
339
340 // Used by CompareTop
341 template <class V>
342 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
343 return Lt(a, b);
344 }
345
346 template <class D>
347 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
348 return Min128Upper(d, a, b);
349 }
350
351 template <class D>
352 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
353 return Max128Upper(d, a, b);
354 }
355
356 // Same as for regular lanes because 128-bit keys are u64.
357 template <class D>
358 HWY_INLINE Vec<D> FirstValue(D d) const {
359 return Set(d, hwy::LowestValue<TFromD<D> >());
360 }
361
362 template <class D>
363 HWY_INLINE Vec<D> LastValue(D d) const {
364 return Set(d, hwy::HighestValue<TFromD<D> >());
365 }
366
367 template <class D>
368 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
369 const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
370 return Sub(v, k1);
371 }
372};
373
374struct OrderDescendingKV128 : public KeyValue128 {
375 using Order = SortDescending;
376 using OrderForSortingNetwork = OrderDescending128;
377
378 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
379 return b[1] < a[1];
380 }
381
382 template <class D>
383 HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
384 return Lt128Upper(d, b, a);
385 }
386
387 // Used by CompareTop
388 template <class V>
389 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
390 return Lt(b, a);
391 }
392
393 template <class D>
394 HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
395 return Max128Upper(d, a, b);
396 }
397
398 template <class D>
399 HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
400 return Min128Upper(d, a, b);
401 }
402
403 // Same as for regular lanes because 128-bit keys are u64.
404 template <class D>
405 HWY_INLINE Vec<D> FirstValue(D d) const {
406 return Set(d, hwy::HighestValue<TFromD<D> >());
407 }
408
409 template <class D>
410 HWY_INLINE Vec<D> LastValue(D d) const {
411 return Set(d, hwy::LowestValue<TFromD<D> >());
412 }
413
414 template <class D>
415 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
416 const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
417 return Add(v, k1);
418 }
419};
420
421// We want to swap 2 u128, i.e. 4 u64 lanes, based on the 0 or FF..FF mask in
422// the most-significant of those lanes (the result of CompareTop), so
423// replicate it 4x. Only called for >= 256-bit vectors.
424
425#if HWY_TARGET <= HWY_AVX3
426template <class V, HWY_IF_V_SIZE_V(V, 64)>
427HWY_INLINE V ReplicateTop4x(V v) {
428 return V{_mm512_permutex_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
429}
430#endif // HWY_TARGET <= HWY_AVX3
431
432#if HWY_TARGET <= HWY_AVX2
433
434template <class V, HWY_IF_V_SIZE_V(V, 32)>
435HWY_INLINE V ReplicateTop4x(V v) {
436 return V{_mm256_permute4x64_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
437}
438
439#else // HWY_TARGET > HWY_AVX2
440
441template <class V>
442HWY_INLINE V ReplicateTop4x(V v) {
443#if HWY_TARGET == HWY_SVE_256
444 return svdup_lane_u64(v, 3);
445#else
446 HWY_ALIGN static constexpr uint64_t kIndices[8] = {3, 3, 3, 3, 7, 7, 7, 7};
447 const ScalableTag<uint64_t> d;
448 return TableLookupLanes(v, SetTableIndices(d, kIndices));
449#endif
450}
451
452#endif // HWY_TARGET <= HWY_AVX2
453
454// Shared code that depends on Order.
455template <class Base>
456struct Traits128 : public Base {
457 using TraitsForSortingNetwork =
458 Traits128<typename Base::OrderForSortingNetwork>;
459
460 template <class D>
461 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
462 TFromD<D>* HWY_RESTRICT buf) const {
463 const Base* base = static_cast<const Base*>(this);
464 const size_t N = Lanes(d);
465 Store(v, d, buf);
466 v = base->SetKey(d, buf + 0); // result must be broadcasted
467 for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
468 v = base->First(d, v, base->SetKey(d, buf + i));
469 }
470 return v;
471 }
472
473 template <class D>
474 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
475 TFromD<D>* HWY_RESTRICT buf) const {
476 const Base* base = static_cast<const Base*>(this);
477 const size_t N = Lanes(d);
478 Store(v, d, buf);
479 v = base->SetKey(d, buf + 0); // result must be broadcasted
480 for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
481 v = base->Last(d, v, base->SetKey(d, buf + i));
482 }
483 return v;
484 }
485
486 template <class D>
487 HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const {
488 const Base* base = static_cast<const Base*>(this);
489
490 const Vec<D> a_copy = a;
491 const auto lt = base->Compare(d, a, b);
492 a = IfThenElse(lt, a, b);
493 b = IfThenElse(lt, b, a_copy);
494 }
495
496 // Conditionally swaps even-numbered keys with their odd-numbered neighbor.
497 template <class D>
498 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
499 const Base* base = static_cast<const Base*>(this);
500 Vec<D> swapped = base->ReverseKeys2(d, v);
501 const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
502 return IfVecThenElse(ReplicateTop4x(cmpHx), swapped, v);
503 }
504
505 // Swaps with the vector formed by reversing contiguous groups of four 128-bit
506 // keys, which implies 512-bit vectors (we do not support more than that).
507 template <class D>
508 HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const {
509 const Base* base = static_cast<const Base*>(this);
510 Vec<D> swapped = base->ReverseKeys4(d, v);
511
512 const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
513 // Similar to ReplicateTop4x, we want to gang together 2 comparison results
514 // (4 lanes). They are not contiguous, so use permute to replicate 4x.
515 HWY_ALIGN uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
516 const Vec<D> select = TableLookupLanes(cmpHx, SetTableIndices(d, kIndices));
517 return IfVecThenElse(select, swapped, v);
518 }
519
520 // Conditionally swaps lane 0 with 4, 1 with 5 etc.
521 template <class D>
522 HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>) const {
523 // Only used by Merge16, which would require 2048 bit vectors (unsupported).
524 HWY_ASSERT(0);
525 }
526};
527
528#endif // VQSORT_ENABLED
529
530} // namespace detail
531// NOLINTNEXTLINE(google-readability-namespace-comments)
532} // namespace HWY_NAMESPACE
533} // namespace hwy
535
536#endif // HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
#define HWY_RESTRICT
Definition base.h:95
#define HWY_INLINE
Definition base.h:101
#define HWY_DASSERT(condition)
Definition base.h:290
#define HWY_MAYBE_UNUSED
Definition base.h:113
#define HWY_ASSERT(condition)
Definition base.h:237
HWY_INLINE Vec128< T, N > Add(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:560
HWY_INLINE Vec128< T, N > Sub(hwy::NonFloatTag, Vec128< T, N > a, Vec128< T, N > b)
Definition emu128-inl.h:570
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition arm_neon-inl.h:7156
HWY_API VFromD< D > VecFromMask(D d, const MFromD< D > m)
Definition arm_neon-inl.h:2960
HWY_INLINE VFromD< D > Max128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9480
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7339
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7331
D d
Definition arm_sve-inl.h:1915
HWY_INLINE VFromD< D > Max128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9490
HWY_API V IfThenElse(MFromD< DFromV< V > > mask, V yes, V no)
Definition arm_neon-inl.h:2992
HWY_API VFromD< D > BitCast(D d, Vec128< FromT, Repartition< FromT, D >().MaxLanes()> v)
Definition arm_neon-inl.h:1581
HWY_API void Store(VFromD< D > v, D d, TFromD< D > *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:3911
HWY_API Vec128< uint8_t > LoadU(D, const uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:3442
HWY_INLINE MFromD< D > Ne128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9466
HWY_INLINE MFromD< D > Lt128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9436
HWY_API VFromD< D > Zero(D d)
Definition arm_neon-inl.h:947
HWY_API Vec128< uint64_t > CompressBlocksNot(Vec128< uint64_t > v, Mask128< uint64_t >)
Definition arm_neon-inl.h:8924
HWY_API void StoreU(Vec128< uint8_t > v, D, uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:3689
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition arm_neon-inl.h:2785
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition arm_neon-inl.h:7074
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition arm_neon-inl.h:5775
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition arm_neon-inl.h:7162
HWY_API VFromD< D > ConcatUpperLower(D d, VFromD< D > hi, VFromD< D > lo)
Definition arm_neon-inl.h:6989
HWY_INLINE MFromD< D > Eq128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9444
HWY_API VFromD< D > ReverseBlocks(D, VFromD< D > v)
Definition arm_neon-inl.h:7169
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition arm_neon-inl.h:2779
HWY_API VFromD< D > LoadDup128(D d, const TFromD< D > *HWY_RESTRICT p)
Definition arm_neon-inl.h:3682
HWY_API bool AllTrue(D d, Mask128< T > m)
Definition arm_neon-inl.h:8416
HWY_API Indices128< TFromD< D >, MaxLanes(D())> SetTableIndices(D d, const TI *idx)
Definition arm_neon-inl.h:5768
HWY_INLINE Vec128< TFromD< D > > Set(D, T t)
Definition arm_neon-inl.h:931
HWY_INLINE MFromD< D > Lt128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9409
HWY_INLINE VFromD< D > Min128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9475
HWY_API Vec128< T, N > IfThenElseZero(Mask128< T, N > mask, Vec128< T, N > yes)
Definition arm_neon-inl.h:3007
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:7107
HWY_INLINE MFromD< D > Eq128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9451
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:46
HWY_API size_t Lanes(D)
Definition rvv-inl.h:598
HWY_INLINE MFromD< D > Ne128(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9459
decltype(GetLane(V())) LaneType
Definition generic_ops-inl.h:39
HWY_INLINE VFromD< D > Min128Upper(D d, VFromD< D > a, VFromD< D > b)
Definition arm_neon-inl.h:9485
Definition abort.h:8
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T LowestValue()
Definition base.h:2191
HWY_API HWY_BITCASTSCALAR_CONSTEXPR T HighestValue()
Definition base.h:2212
#define HWY_ALIGN
Definition set_macros-inl.h:167
#define HWY_NAMESPACE
Definition set_macros-inl.h:166
Definition base.h:412
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()