Grok 12.0.1
test_util-inl.h
Go to the documentation of this file.
1// Copyright 2019 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Target-specific helper functions for use by *_test.cc.
17
18#include <stdio.h>
19#include <string.h> // memset
20
21// IWYU pragma: begin_exports
23#include "hwy/base.h"
24#include "hwy/detect_targets.h"
25#include "hwy/per_target.h"
26#include "hwy/targets.h"
27#include "hwy/tests/hwy_gtest.h"
28#include "hwy/tests/test_util.h"
29// IWYU pragma: end_exports
30
31// After test_util (also includes highway.h)
32#include "hwy/print-inl.h"
33
34// Per-target include guard
35// clang-format off
36#if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == defined(HWY_TARGET_TOGGLE) // NOLINT
37// clang-format on
38#ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
39#undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
40#else
41#define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
42#endif
43
45namespace hwy {
46namespace HWY_NAMESPACE {
47
48// Like Iota, but avoids wrapping around to negative integers.
49template <class D, HWY_IF_FLOAT_D(D)>
51 return Iota(d, 1);
52}
53template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
54HWY_INLINE Vec<D> PositiveIota(D d) {
55 const auto vi = Iota(d, 1);
56 return Max(And(vi, Set(d, LimitsMax<TFromD<D>>())),
57 Set(d, static_cast<TFromD<D>>(1)));
58}
59
60// Same as Iota, but supports bf16. This is possibly too expensive for general
61// use, but fine for tests.
62template <class D, typename First, HWY_IF_NOT_SPECIAL_FLOAT_D(D)>
63VFromD<D> IotaForSpecial(D d, First first) {
64 return Iota(d, first);
65}
66#if HWY_HAVE_FLOAT16
67template <class D, typename First, HWY_IF_F16_D(D), HWY_IF_LANES_GT_D(D, 1)>
68VFromD<D> IotaForSpecial(D d, First first) {
69 return Iota(d, first);
70}
71#else // !HWY_HAVE_FLOAT16
72template <class D, typename First, HWY_IF_F16_D(D), HWY_IF_LANES_GT_D(D, 1),
73 HWY_IF_POW2_GT_D(D, -1)>
74VFromD<D> IotaForSpecial(D d, First first) {
75 const Repartition<float, D> df;
76 const size_t NW = Lanes(d) / 2;
77 const Half<D> dh;
78 const float first2 = static_cast<float>(first) + static_cast<float>(NW);
79 return Combine(d, DemoteTo(dh, Iota(df, first2)),
80 DemoteTo(dh, Iota(df, first)));
81 // TODO(janwas): enable when supported for f16
82 // return OrderedDemote2To(d, Iota(df, first), Iota(df, first + NW));
83}
84// For partial vectors, a single f32 vector is enough, and the prior overload
85// might not be able to Repartition.
86template <class D, typename First, HWY_IF_F16_D(D), HWY_IF_LANES_GT_D(D, 1),
87 HWY_IF_POW2_LE_D(D, -1)>
88VFromD<D> IotaForSpecial(D d, First first) {
89 const Rebind<float, D> df;
90 return DemoteTo(d, Iota(df, first));
91}
92#endif // HWY_HAVE_FLOAT16
93template <class D, typename First, HWY_IF_BF16_D(D), HWY_IF_LANES_GT_D(D, 1),
94 HWY_IF_POW2_GT_D(D, -1)>
95VFromD<D> IotaForSpecial(D d, First first) {
96 const Repartition<float, D> df;
97 const float first1 = ConvertScalarTo<float>(first);
98 const float first2 = first1 + static_cast<float>(Lanes(d) / 2);
99 return OrderedDemote2To(d, Iota(df, first1), Iota(df, first2));
100}
101// For partial vectors, a single f32 vector is enough, and the prior overload
102// might not be able to Repartition.
103template <class D, typename First, HWY_IF_BF16_D(D), HWY_IF_LANES_GT_D(D, 1),
104 HWY_IF_POW2_LE_D(D, -1)>
105VFromD<D> IotaForSpecial(D d, First first) {
106 const Rebind<float, D> df;
107 return DemoteTo(d, Iota(df, first));
108}
109// OrderedDemote2To does not work for single lanes, so special-case that.
110template <class D, typename First, HWY_IF_SPECIAL_FLOAT_D(D),
111 HWY_IF_LANES_D(D, 1)>
112VFromD<D> IotaForSpecial(D d, First first) {
113 const Rebind<float, D> df;
114 return DemoteTo(d, Set(df, static_cast<float>(first)));
115}
116
117// Compare expected array to vector.
118// TODO(b/287462770): inline to work around incorrect SVE codegen.
119template <class D, typename T = TFromD<D>>
120HWY_INLINE void AssertVecEqual(D d, const T* expected, Vec<D> actual,
121 const char* filename, const int line) {
122 const size_t N = Lanes(d);
123 auto actual_lanes = AllocateAligned<T>(N);
124 Store(actual, d, actual_lanes.get());
125
126 const auto info = hwy::detail::MakeTypeInfo<T>();
127 const char* target_name = hwy::TargetName(HWY_TARGET);
128 hwy::detail::AssertArrayEqual(info, expected, actual_lanes.get(), N,
129 target_name, filename, line);
130}
131
132// Compare expected vector to vector.
133// TODO(b/287462770): inline to work around incorrect SVE codegen.
134template <class D, typename T = TFromD<D>>
135HWY_INLINE void AssertVecEqual(D d, Vec<D> expected, Vec<D> actual,
136 const char* filename, int line) {
137 const size_t N = Lanes(d);
138 auto expected_lanes = AllocateAligned<T>(N);
139 auto actual_lanes = AllocateAligned<T>(N);
140 Store(expected, d, expected_lanes.get());
141 Store(actual, d, actual_lanes.get());
142
143 const auto info = hwy::detail::MakeTypeInfo<T>();
144 const char* target_name = hwy::TargetName(HWY_TARGET);
145 hwy::detail::AssertArrayEqual(info, expected_lanes.get(), actual_lanes.get(),
146 N, target_name, filename, line);
147}
148
149// Only checks the valid mask elements (those whose index < Lanes(d)).
150template <class D>
152 const char* filename, int line) {
153 // lvalues prevented MSAN failure in farm_sve.
154 const Vec<D> va = VecFromMask(d, a);
155 const Vec<D> vb = VecFromMask(d, b);
156 AssertVecEqual(d, va, vb, filename, line);
157
158 const char* target_name = hwy::TargetName(HWY_TARGET);
159 AssertEqual(CountTrue(d, a), CountTrue(d, b), target_name, filename, line);
160 AssertEqual(AllTrue(d, a), AllTrue(d, b), target_name, filename, line);
161 AssertEqual(AllFalse(d, a), AllFalse(d, b), target_name, filename, line);
162
163 const size_t N = Lanes(d);
164#if HWY_TARGET == HWY_SCALAR
165 const Rebind<uint8_t, D> d8;
166#else
168#endif
169 const size_t N8 = Lanes(d8);
170 auto bits_a = AllocateAligned<uint8_t>(HWY_MAX(size_t{8}, N8));
171 auto bits_b = AllocateAligned<uint8_t>(size_t{HWY_MAX(8, N8)});
172 memset(bits_a.get(), 0, N8);
173 memset(bits_b.get(), 0, N8);
174 const size_t num_bytes_a = StoreMaskBits(d, a, bits_a.get());
175 const size_t num_bytes_b = StoreMaskBits(d, b, bits_b.get());
176 AssertEqual(num_bytes_a, num_bytes_b, target_name, filename, line);
177 size_t i = 0;
178 // First check whole bytes (if that many elements are still valid)
179 for (; i < N / 8; ++i) {
180 if (bits_a[i] != bits_b[i]) {
181 fprintf(stderr, "Mismatch in byte %d: %d != %d\n", static_cast<int>(i),
182 bits_a[i], bits_b[i]);
183 Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
184 Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
185 hwy::Abort(filename, line, "Masks not equal");
186 }
187 }
188 // Then the valid bit(s) in the last byte.
189 const size_t remainder = N % 8;
190 if (remainder != 0) {
191 const int mask = (1 << remainder) - 1;
192 const int valid_a = bits_a[i] & mask;
193 const int valid_b = bits_b[i] & mask;
194 if (valid_a != valid_b) {
195 fprintf(stderr, "Mismatch in last byte %d: %d != %d\n",
196 static_cast<int>(i), valid_a, valid_b);
197 Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
198 Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
199 hwy::Abort(filename, line, "Masks not equal");
200 }
201 }
202}
203
204// Only sets valid elements (those whose index < Lanes(d)). This helps catch
205// tests that are not masking off the (undefined) upper mask elements.
206//
207// TODO(janwas): with HWY_NOINLINE GCC zeros the upper half of AVX2 masks.
208template <class D>
210 return FirstN(d, Lanes(d));
211}
212
213// MaskFalse is now implemented in x86_128-inl.h on AVX3, arm_sve-inl.h on SVE,
214// rvv-inl.h on RVV, and generic_ops-inl.h on all other targets
215
216#ifndef HWY_ASSERT_EQ
217
218#define HWY_ASSERT_EQ(expected, actual) \
219 hwy::AssertEqual(expected, actual, hwy::TargetName(HWY_TARGET), __FILE__, \
220 __LINE__)
221
222#define HWY_ASSERT_ARRAY_EQ(expected, actual, count) \
223 hwy::AssertArrayEqual(expected, actual, count, hwy::TargetName(HWY_TARGET), \
224 __FILE__, __LINE__)
225
226#define HWY_ASSERT_STRING_EQ(expected, actual) \
227 hwy::AssertStringEqual(expected, actual, hwy::TargetName(HWY_TARGET), \
228 __FILE__, __LINE__)
229
230#define HWY_ASSERT_VEC_EQ(d, expected, actual) \
231 AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
232
233#define HWY_ASSERT_MASK_EQ(d, expected, actual) \
234 AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
235
236#endif // HWY_ASSERT_EQ
237
238namespace detail {
239
240// Helpers for instantiating tests with combinations of lane types / counts.
241
242// Calls Test for each CappedTag<T, N> where N is in [kMinLanes, kMul * kMinArg]
243// and the resulting Lanes() is in [min_lanes, max_lanes]. The upper bound
244// is required to ensure capped vectors remain extendable. Implemented by
245// recursively halving kMul until it is zero.
246template <typename T, size_t kMul, size_t kMinArg, class Test, int kPow2 = 0>
248 static void Do(size_t min_lanes, size_t max_lanes) {
250
251 // If we already don't have enough lanes, stop.
252 const size_t lanes = Lanes(d);
253 if (lanes < min_lanes) return;
254
255 if (lanes <= max_lanes) {
256 Test()(T(), d);
257 }
258 ForeachCappedR<T, kMul / 2, kMinArg, Test, kPow2>::Do(min_lanes, max_lanes);
259 }
260};
261
262// Base case to stop the recursion.
263template <typename T, size_t kMinArg, class Test, int kPow2>
264struct ForeachCappedR<T, 0, kMinArg, Test, kPow2> {
265 static void Do(size_t, size_t) {}
266};
267
268#if HWY_HAVE_SCALABLE
269
270template <typename T>
271constexpr int MinPow2() {
272 // Highway follows RVV LMUL in that the smallest fraction is 1/8th (encoded
273 // as kPow2 == -3). The fraction also must not result in zero lanes for the
274 // smallest possible vector size, which is 128 bits even on RISC-V (with the
275 // application processor profile).
276 return HWY_MAX(-3, -static_cast<int>(CeilLog2(16 / sizeof(T))));
277}
278
279constexpr int MaxPow2() {
280#if HWY_TARGET == HWY_RVV
281 // Only RVV allows multiple vector registers.
282 return 3; // LMUL=8
283#else
284 // For all other platforms, we cannot exceed a full vector.
285 return 0;
286#endif
287}
288
289// Iterates kPow2 up to and including kMaxPow2. Below we specialize for
290// valid=false to stop the iteration. The ForeachPow2Trim enables shorter
291// argument lists, but use ForeachPow2 when you want to specify the actual min.
292template <typename T, int kPow2, int kMaxPow2, bool valid, class Test>
293struct ForeachPow2 {
294 static void Do(size_t min_lanes) {
295 const ScalableTag<T, kPow2> d;
296
297 static_assert(MinPow2<T>() <= kPow2 && kPow2 <= MaxPow2(), "");
298 if (Lanes(d) >= min_lanes) {
299 Test()(T(), d);
300 } else {
301 fprintf(stderr, "%d lanes < %d: T=%d pow=%d\n",
302 static_cast<int>(Lanes(d)), static_cast<int>(min_lanes),
303 static_cast<int>(sizeof(T)), kPow2);
304 HWY_ASSERT(min_lanes != 1);
305 }
306
307 ForeachPow2<T, kPow2 + 1, kMaxPow2, (kPow2 + 1) <= kMaxPow2, Test>::Do(
308 min_lanes);
309 }
310};
311
312// Base case to stop the iteration.
313template <typename T, int kPow2, int kMaxPow2, class Test>
314struct ForeachPow2<T, kPow2, kMaxPow2, /*valid=*/false, Test> {
315 static void Do(size_t) {}
316};
317
318// Iterates kPow2 over [MinPow2<T>() + kAddMin, MaxPow2() - kSubMax].
319// This is a wrapper that shortens argument lists, allowing users to skip the
320// MinPow2 and MaxPow2. Nonzero kAddMin implies a minimum LMUL, and nonzero
321// kSubMax reduces the maximum LMUL (e.g. for type promotions, where the result
322// is larger, thus the input cannot already use the maximum LMUL).
323template <typename T, int kAddMin, int kSubMax, class Test>
324using ForeachPow2Trim =
325 ForeachPow2<T, MinPow2<T>() + kAddMin, MaxPow2() - kSubMax,
326 MinPow2<T>() + kAddMin <= MaxPow2() - kSubMax, Test>;
327
328#else
329// ForeachCappedR already handled all possible sizes.
330#endif // HWY_HAVE_SCALABLE
331
332} // namespace detail
333
334// These 'adapters' call a test for all possible N or kPow2 subject to
335// constraints such as "vectors must be extendable" or "vectors >= 128 bits".
336// They may be called directly, or via For*Types. Note that for an adapter C,
337// `C<Test>(T())` does not call the test - the correct invocation is
338// `C<Test>()(T())`, or preferably `ForAllTypes(C<Test>())`. We check at runtime
339// that operator() is called to prevent such bugs. Note that this is not
340// thread-safe, but that is fine because C are typically local variables.
341
342// Calls Test for all powers of two in [1, Lanes(d) * (RVV? 2 : 1) ]. For
343// interleaved_test; RVV segments are limited to 8 registers, so we can only go
344// up to LMUL=2.
345template <class Test>
347 mutable bool called_ = false;
348
349 public:
351 if (!called_) {
352 HWY_ABORT("Test is incorrect, ensure operator() is called");
353 }
354 }
355
356 template <typename T>
357 void operator()(T /*unused*/) const {
358 called_ = true;
359
360#if HWY_TARGET == HWY_SCALAR
362#else
363 detail::ForeachCappedR<T, HWY_LANES(T), 1, Test>::Do(
364 1, Lanes(ScalableTag<T>()));
365
366#if HWY_TARGET == HWY_RVV
367 // To get LMUL=2 (kPow2=1), 2 is what we subtract from MaxPow2()=3.
368 detail::ForeachPow2Trim<T, 0, 2, Test>::Do(1);
369#elif HWY_HAVE_SCALABLE
370 detail::ForeachPow2Trim<T, 0, 0, Test>::Do(1);
371#endif
372#endif // HWY_TARGET == HWY_SCALAR
373 }
374};
375
376// Calls Test for all powers of two in [1, Lanes(d) >> kPow2]. This is for
377// ops that widen their input, e.g. Combine (not supported by HWY_SCALAR).
378template <class Test, int kPow2 = 1>
380 mutable bool called_ = false;
381
382 public:
384 if (!called_) {
385 HWY_ABORT("Test is incorrect, ensure operator() is called");
386 }
387 }
388
389 template <typename T>
390 void operator()(T /*unused*/) const {
391 called_ = true;
392 constexpr size_t kMaxCapped = HWY_LANES(T);
393 // Skip CappedTag that are already full vectors.
394 const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
395 (void)kMaxCapped;
396 (void)max_lanes;
397#if HWY_TARGET == HWY_SCALAR
398 // not supported
399#else
400 constexpr size_t kMul = kMaxCapped >> kPow2;
401 constexpr size_t kMinArg = size_t{1} << kPow2;
403#if HWY_HAVE_SCALABLE
404 detail::ForeachPow2Trim<T, 0, kPow2, Test>::Do(1);
405#endif
406#endif // HWY_SCALAR
407 }
408};
409
410// Calls Test for all power of two N in [1 << kPow2, Lanes(d)]. This is for ops
411// that narrow their input, e.g. UpperHalf.
412template <class Test, int kPow2 = 1>
414 mutable bool called_ = false;
415
416 public:
418 if (!called_) {
419 HWY_ABORT("Test is incorrect, ensure operator() is called");
420 }
421 }
422
423 template <typename T>
424 void operator()(T /*unused*/) const {
425 called_ = true;
426 constexpr size_t kMinLanes = size_t{1} << kPow2;
427 constexpr size_t kMaxCapped = HWY_LANES(T);
428 // For shrinking, an upper limit is unnecessary.
429 constexpr size_t max_lanes = kMaxCapped;
430
431 (void)kMinLanes;
432 (void)max_lanes;
433 (void)max_lanes;
434#if HWY_TARGET == HWY_SCALAR
435 // not supported
436#elif HWY_HAVE_SCALABLE
437 detail::ForeachPow2Trim<T, kPow2, 0, Test>::Do(kMinLanes);
438#else
439 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
440 kMinLanes, max_lanes);
441#endif // HWY_TARGET == HWY_SCALAR
442 }
443};
444
445// Calls Test for all supported power of two vectors of at least kMinBits.
446// Examples: AES or 64x64 require 128 bits, casts may require 64 bits.
447template <size_t kMinBits, class Test>
449 mutable bool called_ = false;
450
451 public:
453 if (!called_) {
454 HWY_ABORT("Test is incorrect, ensure operator() is called");
455 }
456 }
457
458 template <typename T>
459 void operator()(T /*unused*/) const {
460 called_ = true;
461 constexpr size_t kMaxCapped = HWY_LANES(T);
462 constexpr size_t kMinLanes = kMinBits / 8 / sizeof(T);
463 // An upper limit is unnecessary.
464 constexpr size_t max_lanes = kMaxCapped;
465 (void)max_lanes;
466#if HWY_TARGET == HWY_SCALAR
467 (void)kMinLanes; // not supported
468#else
469 detail::ForeachCappedR<T, HWY_LANES(T) / kMinLanes, kMinLanes, Test>::Do(
470 kMinLanes, max_lanes);
471#if HWY_HAVE_SCALABLE
472 // Can be 0 (handled below) if kMinBits > 128.
473 constexpr size_t kRatio = 128 / kMinBits;
474 constexpr int kMinPow2 =
475 kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
476 constexpr bool kValid = kMinPow2 <= detail::MaxPow2();
477 detail::ForeachPow2<T, kMinPow2, detail::MaxPow2(), kValid, Test>::Do(
478 kMinLanes);
479#endif
480#endif // HWY_TARGET == HWY_SCALAR
481 }
482};
483
484template <class Test>
486
487// Calls Test for all N that can be promoted (not the same as Extendable because
488// HWY_SCALAR has one lane). Also used for ZipLower, but not ZipUpper.
489template <class Test, int kPow2 = 1>
491 mutable bool called_ = false;
492
493 public:
495 if (!called_) {
496 HWY_ABORT("Test is incorrect, ensure operator() is called");
497 }
498 }
499
500 template <typename T>
501 void operator()(T /*unused*/) const {
502 called_ = true;
503 constexpr size_t kFactor = size_t{1} << kPow2;
504 static_assert(kFactor >= 2 && kFactor * sizeof(T) <= sizeof(uint64_t), "");
505 constexpr size_t kMaxCapped = HWY_LANES(T);
506 // Skip CappedTag that are already full vectors.
507 const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
508 (void)kMaxCapped;
509 (void)max_lanes;
510#if HWY_TARGET == HWY_SCALAR
512#else
513 using DLargestFrom = CappedTag<T, (kMaxCapped >> kPow2) * kFactor, -kPow2>;
514 static_assert(HWY_MAX_LANES_D(DLargestFrom) <= (kMaxCapped >> kPow2),
515 "HWY_MAX_LANES_D(DLargestFrom) must be less than or equal to "
516 "(kMaxCapped >> kPow2)");
517 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kFactor, Test, -kPow2>::Do(
518 1, max_lanes);
519#if HWY_HAVE_SCALABLE
520 detail::ForeachPow2Trim<T, 0, kPow2, Test>::Do(1);
521#endif
522#endif // HWY_SCALAR
523 }
524};
525
526// Calls Test for all N than can be demoted (not the same as Shrinkable because
527// HWY_SCALAR has one lane and as a one-lane vector with a lane size of at least
528// 2 bytes can always be demoted to a vector with a smaller lane type).
529template <class Test, int kPow2 = 1>
531 mutable bool called_ = false;
532
533 public:
535 if (!called_) {
536 HWY_ABORT("Test is incorrect, ensure operator() is called");
537 }
538 }
539
540 template <typename T>
541 void operator()(T /*unused*/) const {
542 called_ = true;
543
544#if HWY_HAVE_SCALABLE
545 // kMinTVecPow2 is the smallest Pow2 for a vector with lane type T that is
546 // supported by detail::ForeachPow2Trim
547 constexpr int kMinTVecPow2 = detail::MinPow2<T>();
548
549 // detail::MinPow2<T>() + kMinPow2Adj is the smallest Pow2 for a vector with
550 // lane type T that can be demoted to a vector with a lane size of
551 // (sizeof(T) >> kPow2)
552 constexpr int kMinPow2Adj = HWY_MAX(-3 - kMinTVecPow2 + kPow2, 0);
553
554 detail::ForeachPow2Trim<T, kMinPow2Adj, 0, Test>::Do(1);
555
556 // On targets with scalable vectors, detail::ForeachCappedR below only
557 // needs to be executed for vectors that have less than
558 // Lanes(ScalableTag<T>()) as full vectors were already checked by the
559 // detail::ForeachPow2Trim above.
560 constexpr size_t kMaxCapped = HWY_LANES(T) >> 1;
561 const size_t max_lanes = Lanes(ScalableTag<T>()) >> 1;
562#else
563 // On targets where HWY_HAVE_SCALABLE is 0, any vector with HWY_LANES(T)
564 // or fewer lanes can always be demoted to a vector with a smaller lane
565 // type.
566 constexpr size_t kMaxCapped = HWY_LANES(T);
567 const size_t max_lanes = kMaxCapped;
568#endif
569
571 }
572};
573
574// For LowerHalf/Quarter.
575template <class Test, int kPow2 = 1>
577 mutable bool called_ = false;
578
579 public:
581 if (!called_) {
582 HWY_ABORT("Test is incorrect, ensure operator() is called");
583 }
584 }
585
586 template <typename T>
587 void operator()(T /*unused*/) const {
588 called_ = true;
589#if HWY_TARGET == HWY_SCALAR
591#else
592 constexpr size_t kMinLanes = size_t{1} << kPow2;
593 // For shrinking, an upper limit is unnecessary.
594 constexpr size_t kMaxCapped = HWY_LANES(T);
595 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
596 kMinLanes, kMaxCapped);
597
598// TODO(janwas): call Extendable if kMinLanes check not required?
599#if HWY_HAVE_SCALABLE
600 detail::ForeachPow2Trim<T, kPow2, 0, Test>::Do(kMinLanes);
601#endif
602#endif // HWY_TARGET == HWY_SCALAR
603 }
604};
605
606// Calls Test for all power of two N in [1, Lanes(d)]. This is the default
607// for ops that do not narrow nor widen their input, nor require 128 bits.
608template <class Test>
610 mutable bool called_ = false;
611
612 public:
614 if (!called_) {
615 HWY_ABORT("Test is incorrect, ensure operator() is called");
616 }
617 }
618
619 template <typename T>
620 void operator()(T t) const {
621 called_ = true;
622#if HWY_TARGET == HWY_SCALAR
623 (void)t;
625#else
627#endif
628 }
629};
630
631// ForPartialFixedOrFullScalableVectors calls Test for each D where
632// MaxLanes(D()) == MaxLanes(DFromV<VFromD<D>>())
633#if HWY_HAVE_SCALABLE
634template <class Test>
636 mutable bool called_ = false;
637
638 public:
639 ~ForPartialFixedOrFullScalableVectors() {
640 if (!called_) {
641 HWY_ABORT("Test is incorrect, ensure operator() is called");
642 }
643 }
644
645 template <typename T>
646 void operator()(T /*t*/) const {
647 called_ = true;
648#if HWY_TARGET == HWY_RVV
649 constexpr int kMinPow2 = -3 + static_cast<int>(CeilLog2(sizeof(T)));
650 constexpr int kMaxPow2 = 3;
651#else
652 constexpr int kMinPow2 = 0;
653 constexpr int kMaxPow2 = 0;
654#endif
655 detail::ForeachPow2<T, kMinPow2, kMaxPow2, true, Test>::Do(1);
656 }
657};
658#elif HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
659template <class Test>
662#else
663template <class Test>
665#endif
666
667// Type lists to shorten call sites:
668
669template <class Func>
670void ForSignedTypes(const Func& func) {
671 func(int8_t());
672 func(int16_t());
673 func(int32_t());
674#if HWY_HAVE_INTEGER64
675 func(int64_t());
676#endif
677}
678
679template <class Func>
680void ForUnsignedTypes(const Func& func) {
681 func(uint8_t());
682 func(uint16_t());
683 func(uint32_t());
684#if HWY_HAVE_INTEGER64
685 func(uint64_t());
686#endif
687}
688
689template <class Func>
690void ForIntegerTypes(const Func& func) {
691 ForSignedTypes(func);
692 ForUnsignedTypes(func);
693}
694
695template <class Func>
696void ForFloat16Types(const Func& func) {
697#if HWY_HAVE_FLOAT16
698 func(float16_t());
699#else
700 (void)func;
701#endif
702}
703
704template <class Func>
705void ForFloat64Types(const Func& func) {
706#if HWY_HAVE_FLOAT64
707 func(double());
708#else
709 (void)func;
710#endif
711}
712
713// `#if HWY_HAVE_FLOAT*` is sufficient for tests using static dispatch. In
714// sort_test we also use dynamic dispatch, so there we call the For*Dynamic
715// functions which also check hwy::HaveFloat*.
716template <class Func>
717void ForFloat16TypesDynamic(const Func& func) {
718#if HWY_HAVE_FLOAT16
719 if (hwy::HaveFloat16()) {
720 func(float16_t());
721 }
722#else
723 (void)func;
724#endif
725}
726
727template <class Func>
728void ForFloat64TypesDynamic(const Func& func) {
729#if HWY_HAVE_FLOAT64
730 if (hwy::HaveFloat64()) {
731 func(double());
732 }
733#else
734 (void)func;
735#endif
736}
737
738template <class Func>
739void ForFloat3264Types(const Func& func) {
740 func(float());
741 ForFloat64Types(func);
742}
743
744template <class Func>
745void ForFloatTypes(const Func& func) {
746 ForFloat16Types(func);
747 ForFloat3264Types(func);
748}
749
750template <class Func>
751void ForFloatTypesDynamic(const Func& func) {
753 func(float());
755}
756
757template <class Func>
758void ForAllTypes(const Func& func) {
759 ForIntegerTypes(func);
760 ForFloatTypes(func);
761}
762
763// For ops that are also unconditionally available for bfloat16_t/float16_t.
764template <class Func>
765void ForSpecialTypes(const Func& func) {
766 func(float16_t());
767 func(bfloat16_t());
768}
769template <class Func>
770void ForAllTypesAndSpecial(const Func& func) {
771 ForAllTypes(func);
772 ForSpecialTypes(func);
773}
774
775template <class Func>
776void ForUI8(const Func& func) {
777 func(uint8_t());
778 func(int8_t());
779}
780
781template <class Func>
782void ForUI16(const Func& func) {
783 func(uint16_t());
784 func(int16_t());
785}
786
787template <class Func>
788void ForUIF16(const Func& func) {
789 ForUI16(func);
790 ForFloat16Types(func);
791}
792
793template <class Func>
794void ForUI32(const Func& func) {
795 func(uint32_t());
796 func(int32_t());
797}
798
799template <class Func>
800void ForUIF32(const Func& func) {
801 ForUI32(func);
802 func(float());
803}
804
805template <class Func>
806void ForUI64(const Func& func) {
807#if HWY_HAVE_INTEGER64
808 func(uint64_t());
809 func(int64_t());
810#endif
811}
812
813template <class Func>
814void ForUIF64(const Func& func) {
815 ForUI64(func);
816 ForFloat64Types(func);
817}
818
819template <class Func>
820void ForUI3264(const Func& func) {
821 ForUI32(func);
822 ForUI64(func);
823}
824
825template <class Func>
826void ForUIF3264(const Func& func) {
827 ForUIF32(func);
828 ForUIF64(func);
829}
830
831template <class Func>
832void ForU816(const Func& func) {
833 func(uint8_t());
834 func(uint16_t());
835}
836
837template <class Func>
838void ForI816(const Func& func) {
839 func(int8_t());
840 func(int16_t());
841}
842
843template <class Func>
844void ForU163264(const Func& func) {
845 func(uint16_t());
846 func(uint32_t());
847#if HWY_HAVE_INTEGER64
848 func(uint64_t());
849#endif
850}
851
852template <class Func>
853void ForUI163264(const Func& func) {
854 ForUI16(func);
855 ForUI3264(func);
856}
857
858template <class Func>
859void ForUIF163264(const Func& func) {
860 ForUIF16(func);
861 ForUIF3264(func);
862}
863
864// For tests that involve loops, adjust the trip count so that emulated tests
865// finish quickly (but always at least 2 iterations to ensure some diversity).
866constexpr size_t AdjustedReps(size_t max_reps) {
867#if HWY_ARCH_RISCV
868 return HWY_MAX(max_reps / 32, 2);
869#elif HWY_IS_DEBUG_BUILD
870 return HWY_MAX(max_reps / 8, 2);
871#elif HWY_ARCH_ARM
872 return HWY_MAX(max_reps / 4, 2);
873#elif HWY_COMPILER_MSVC
874 return HWY_MAX(max_reps / 2, 2);
875#else
876 return HWY_MAX(max_reps, 2);
877#endif
878}
879
880// Same as above, but the loop trip count will be 1 << max_pow2.
881constexpr size_t AdjustedLog2Reps(size_t max_pow2) {
882 // If "negative" (unsigned wraparound), use original.
883#if HWY_ARCH_RISCV
884 return HWY_MIN(max_pow2 - 4, max_pow2);
885#elif HWY_IS_DEBUG_BUILD
886 return HWY_MIN(max_pow2 - 1, max_pow2);
887#elif HWY_ARCH_ARM
888 return HWY_MIN(max_pow2 - 1, max_pow2);
889#else
890 return max_pow2;
891#endif
892}
893
894// NOLINTNEXTLINE(google-readability-namespace-comments)
895} // namespace HWY_NAMESPACE
896} // namespace hwy
898
899#endif // per-target include guard
#define HWY_MAX(a, b)
Definition base.h:177
#define HWY_NOINLINE
Definition base.h:103
#define HWY_MIN(a, b)
Definition base.h:176
#define HWY_ABORT(format,...)
Definition base.h:233
#define HWY_INLINE
Definition base.h:101
#define HWY_ASSERT(condition)
Definition base.h:237
Definition test_util-inl.h:530
~ForDemoteVectors()
Definition test_util-inl.h:534
void operator()(T) const
Definition test_util-inl.h:541
bool called_
Definition test_util-inl.h:531
Definition test_util-inl.h:379
void operator()(T) const
Definition test_util-inl.h:390
bool called_
Definition test_util-inl.h:380
~ForExtendableVectors()
Definition test_util-inl.h:383
Definition test_util-inl.h:448
bool called_
Definition test_util-inl.h:449
~ForGEVectors()
Definition test_util-inl.h:452
void operator()(T) const
Definition test_util-inl.h:459
Definition test_util-inl.h:576
~ForHalfVectors()
Definition test_util-inl.h:580
bool called_
Definition test_util-inl.h:577
void operator()(T) const
Definition test_util-inl.h:587
Definition test_util-inl.h:346
~ForMaxPow2()
Definition test_util-inl.h:350
bool called_
Definition test_util-inl.h:347
void operator()(T) const
Definition test_util-inl.h:357
Definition test_util-inl.h:609
bool called_
Definition test_util-inl.h:610
void operator()(T t) const
Definition test_util-inl.h:620
~ForPartialVectors()
Definition test_util-inl.h:613
Definition test_util-inl.h:490
~ForPromoteVectors()
Definition test_util-inl.h:494
bool called_
Definition test_util-inl.h:491
void operator()(T) const
Definition test_util-inl.h:501
Definition test_util-inl.h:413
void operator()(T) const
Definition test_util-inl.h:424
bool called_
Definition test_util-inl.h:414
~ForShrinkableVectors()
Definition test_util-inl.h:417
#define HWY_TARGET
Definition detect_targets.h:543
typename D::template Rebind< T > Rebind
Definition ops/shared-inl.h:460
HWY_API VFromD< D > VecFromMask(D d, const MFromD< D > m)
Definition arm_neon-inl.h:2960
constexpr size_t AdjustedReps(size_t max_reps)
Definition test_util-inl.h:866
void ForUIF32(const Func &func)
Definition test_util-inl.h:800
D d
Definition arm_sve-inl.h:1915
void ForI816(const Func &func)
Definition test_util-inl.h:838
HWY_API size_t CountTrue(D, Mask128< T > mask)
Definition arm_neon-inl.h:8358
void ForUI163264(const Func &func)
Definition test_util-inl.h:853
void ForU816(const Func &func)
Definition test_util-inl.h:832
V VecArg
Definition ops/shared-inl.h:69
HWY_INLINE Vec< D > PositiveIota(D d)
Definition test_util-inl.h:50
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2690
void ForUIF3264(const Func &func)
Definition test_util-inl.h:826
void ForUIF163264(const Func &func)
Definition test_util-inl.h:859
HWY_INLINE void AssertVecEqual(D d, const T *expected, Vec< D > actual, const char *filename, const int line)
Definition test_util-inl.h:120
constexpr size_t AdjustedLog2Reps(size_t max_pow2)
Definition test_util-inl.h:881
HWY_API void Store(VFromD< D > v, D d, TFromD< D > *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:3911
void ForU163264(const Func &func)
Definition test_util-inl.h:844
HWY_API Vec128< uint64_t, N > Max(Vec128< uint64_t, N > a, Vec128< uint64_t, N > b)
Definition arm_neon-inl.h:3377
void ForFloatTypesDynamic(const Func &func)
Definition test_util-inl.h:751
void ForUI32(const Func &func)
Definition test_util-inl.h:794
void ForAllTypes(const Func &func)
Definition test_util-inl.h:758
VFromD< D > IotaForSpecial(D d, First first)
Definition test_util-inl.h:63
void ForFloat16TypesDynamic(const Func &func)
Definition test_util-inl.h:717
HWY_API VFromD< D > OrderedDemote2To(D d, V a, V b)
Definition arm_neon-inl.h:7394
void ForFloatTypes(const Func &func)
Definition test_util-inl.h:745
ForGEVectors< HWY_MAX_BYTES *8, Test > ForPartialFixedOrFullScalableVectors
Definition test_util-inl.h:660
HWY_API size_t StoreMaskBits(D d, MFromD< D > mask, uint8_t *bits)
Definition arm_neon-inl.h:8402
HWY_NOINLINE void AssertMaskEqual(D d, VecArg< Mask< D > > a, VecArg< Mask< D > > b, const char *filename, int line)
Definition test_util-inl.h:151
void ForSpecialTypes(const Func &func)
Definition test_util-inl.h:765
HWY_API VFromD< D > Load(D d, const TFromD< D > *HWY_RESTRICT p)
Definition arm_neon-inl.h:3664
void ForIntegerTypes(const Func &func)
Definition test_util-inl.h:690
HWY_API Vec128< uint8_t > Combine(D, Vec64< uint8_t > hi, Vec64< uint8_t > lo)
Definition arm_neon-inl.h:1314
void ForUI8(const Func &func)
Definition test_util-inl.h:776
HWY_API Vec64< uint16_t > DemoteTo(D, Vec128< int32_t > v)
Definition arm_neon-inl.h:4629
typename detail::CappedTagChecker< T, kLimit, kPow2 >::type CappedTag
Definition ops/shared-inl.h:379
void ForUI3264(const Func &func)
Definition test_util-inl.h:820
HWY_API bool AllTrue(D d, Mask128< T > m)
Definition arm_neon-inl.h:8416
HWY_API VFromD< D > Iota(D d, const T2 first)
Definition arm_neon-inl.h:1297
void ForUIF64(const Func &func)
Definition test_util-inl.h:814
void ForUI16(const Func &func)
Definition test_util-inl.h:782
void ForFloat64TypesDynamic(const Func &func)
Definition test_util-inl.h:728
decltype(MaskFromVec(Zero(D()))) Mask
Definition generic_ops-inl.h:52
void ForFloat3264Types(const Func &func)
Definition test_util-inl.h:739
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition ops/shared-inl.h:367
void ForFloat16Types(const Func &func)
Definition test_util-inl.h:696
void ForUI64(const Func &func)
Definition test_util-inl.h:806
HWY_INLINE Vec128< TFromD< D > > Set(D, T t)
Definition arm_neon-inl.h:931
void ForSignedTypes(const Func &func)
Definition test_util-inl.h:670
typename D::Half Half
Definition ops/shared-inl.h:487
HWY_API bool AllFalse(D d, MFromD< D > m)
Definition arm_neon-inl.h:8410
void ForUIF16(const Func &func)
Definition test_util-inl.h:788
decltype(Set(D(), TFromD< D >())) VFromD
Definition arm_neon-inl.h:944
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:46
HWY_API size_t Lanes(D)
Definition rvv-inl.h:598
HWY_API MFromD< D > FirstN(D d, size_t num)
Definition arm_neon-inl.h:3232
HWY_INLINE Mask< D > MaskTrue(const D d)
Definition test_util-inl.h:209
void ForFloat64Types(const Func &func)
Definition test_util-inl.h:705
void ForAllTypesAndSpecial(const Func &func)
Definition test_util-inl.h:770
typename D::template Repartition< T > Repartition
Definition ops/shared-inl.h:471
HWY_API void Print(const D d, const char *caption, V v, size_t lane_u=0, size_t max_lanes=7)
Definition print-inl.h:39
void ForUnsignedTypes(const Func &func)
Definition test_util-inl.h:680
HWY_TEST_DLLEXPORT void AssertArrayEqual(const TypeInfo &info, const void *expected_void, const void *actual_void, size_t N, const char *target_name, const char *filename, int line)
Definition abort.h:8
FuncOutput(*)(const void *, FuncInput) Func
Definition nanobenchmark.h:87
HWY_INLINE void AssertEqual(const TExpected texpected, const TActual actual, const char *target_name, const char *filename, int line, size_t lane=0)
Definition test_util.h:194
static HWY_MAYBE_UNUSED const char * TargetName(int64_t target)
Definition targets.h:85
HWY_DLLEXPORT bool HaveFloat64()
constexpr size_t CeilLog2(TI x)
Definition base.h:2669
HWY_DLLEXPORT HWY_NORETURN void int line
Definition base.h:231
HWY_API constexpr T LimitsMax()
Definition base.h:2174
HWY_DLLEXPORT bool HaveFloat16()
#define HWY_IF_F16_D(D)
Definition ops/shared-inl.h:597
#define HWY_IF_BF16_D(D)
Definition ops/shared-inl.h:594
#define HWY_IF_POW2_GT_D(D, pow2)
Definition ops/shared-inl.h:574
#define HWY_IF_POW2_LE_D(D, pow2)
Definition ops/shared-inl.h:573
#define HWY_IF_LANES_GT_D(D, lanes)
Definition ops/shared-inl.h:562
#define HWY_IF_LANES_D(D, lanes)
Definition ops/shared-inl.h:560
#define HWY_IF_SPECIAL_FLOAT_D(D)
Definition ops/shared-inl.h:540
#define HWY_MAX_LANES_D(D)
Definition ops/shared-inl.h:432
#define HWY_LANES(T)
Definition set_macros-inl.h:169
#define HWY_NAMESPACE
Definition set_macros-inl.h:166
static void Do(size_t, size_t)
Definition test_util-inl.h:265
Definition test_util-inl.h:247
static void Do(size_t min_lanes, size_t max_lanes)
Definition test_util-inl.h:248
Definition base.h:1594
Definition base.h:1117
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()
int VFromD
Definition tuple-inl.h:25