Grok 12.0.1
result-inl.h
Go to the documentation of this file.
1// Copyright 2021 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
17
18// Normal include guard for non-SIMD parts
19#ifndef HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
20#define HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
21
22#include <time.h>
23
24#include <algorithm> // std::sort
25#include <string>
26
27#include "hwy/base.h"
28#include "hwy/nanobenchmark.h"
29#include "hwy/timer.h"
30
31namespace hwy {
32
33// Returns trimmed mean (we don't want to run an out-of-L3-cache sort often
34// enough for the mode to be reliable).
35static inline double SummarizeMeasurements(std::vector<double>& seconds) {
36 std::sort(seconds.begin(), seconds.end());
37 double sum = 0;
38 int count = 0;
39 const size_t num = seconds.size();
40 for (size_t i = num / 4; i < num / 2; ++i) {
41 sum += seconds[i];
42 count += 1;
43 }
44 return sum / count;
45}
46
47} // namespace hwy
48#endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
49
50// Per-target
51#if defined(HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE) == \
52 defined(HWY_TARGET_TOGGLE)
53#ifdef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
54#undef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
55#else
56#define HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
57#endif
58
60namespace hwy {
61namespace HWY_NAMESPACE {
62
63struct Result {
64 Result() {}
65 Result(const Algo algo, Dist dist, size_t num_keys, size_t num_threads,
66 double sec, size_t sizeof_key, const char* key_name)
68 algo(algo),
69 dist(dist),
72 sec(sec),
75
76 void Print() const {
77 const double bytes = static_cast<double>(num_keys) *
78 static_cast<double>(num_threads) *
79 static_cast<double>(sizeof_key);
80 printf("%10s: %12s: %7s: %9s: %05g %4.0f MB/s (%2zu threads)\n",
82 DistName(dist), static_cast<double>(num_keys), bytes * 1E-6 / sec,
84 }
85
86 int64_t target;
89 size_t num_keys = 0;
90 size_t num_threads = 0;
91 double sec = 0.0;
92 size_t sizeof_key = 0;
93 std::string key_name;
94};
95
96template <class Traits, typename LaneType>
97bool VerifyPartialSort(Traits st, const InputStats<LaneType>& input_stats,
98 const LaneType* out, const size_t num_lanes,
99 const size_t k, const char* caller) {
100 constexpr size_t N1 = st.LanesPerKey();
101 HWY_ASSERT(num_lanes >= N1);
102 HWY_ASSERT(k >= N1 && k < num_lanes);
103
104 InputStats<LaneType> output_stats;
105 // Ensure it matches the sort order
106 for (size_t i = 0; i < num_lanes - N1; i += N1) {
107 output_stats.Notify(out[i]);
108 if (N1 == 2) output_stats.Notify(out[i + 1]);
109
110 // Reverse order instead of checking !Compare1 so we accept equal keys.
111 if (i < k - N1 && st.Compare1(out + i + N1, out + i)) {
112 fprintf(stderr, "%s: i=%d of %d lanes: N1=%d", caller,
113 static_cast<int>(i), static_cast<int>(num_lanes),
114 static_cast<int>(N1));
115 // TODO %5.0f prints unhelpful integers for the float/double tests.
116 fprintf(stderr, "%5.0f %5.0f vs. %5.0f %5.0f\n\n",
117 static_cast<double>(out[i + 1]), static_cast<double>(out[i + 0]),
118 static_cast<double>(out[i + N1 + 1]),
119 static_cast<double>(out[i + N1]));
120 HWY_ABORT("%d-bit sort is incorrect\n",
121 static_cast<int>(sizeof(LaneType) * 8 * N1));
122 }
123 }
124 output_stats.Notify(out[num_lanes - N1]);
125 if (N1 == 2) output_stats.Notify(out[num_lanes - N1 + 1]);
126
127 return input_stats == output_stats;
128}
129
130template <class Traits, typename LaneType>
131bool VerifySort(Traits st, const InputStats<LaneType>& input_stats,
132 const LaneType* out, size_t num_lanes, const char* caller) {
133 constexpr size_t N1 = st.LanesPerKey();
134 HWY_ASSERT(num_lanes >= N1);
135
136 InputStats<LaneType> output_stats;
137 // Ensure it matches the sort order
138 for (size_t i = 0; i < num_lanes - N1; i += N1) {
139 output_stats.Notify(out[i]);
140 if (N1 == 2) output_stats.Notify(out[i + 1]);
141 // Reverse order instead of checking !Compare1 so we accept equal keys.
142 if (st.Compare1(out + i + N1, out + i)) {
143 fprintf(stderr, "%s: i=%d of %d lanes: N1=%d", caller,
144 static_cast<int>(i), static_cast<int>(num_lanes),
145 static_cast<int>(N1));
146 // TODO %5.0f prints unhelpful integers for the float/double tests.
147 fprintf(stderr, "%5.0f %5.0f vs. %5.0f %5.0f\n\n",
148 static_cast<double>(out[i + 1]), static_cast<double>(out[i + 0]),
149 static_cast<double>(out[i + N1 + 1]),
150 static_cast<double>(out[i + N1]));
151 HWY_ABORT("%d-bit sort is incorrect\n",
152 static_cast<int>(sizeof(LaneType) * 8 * N1));
153 }
154 }
155 output_stats.Notify(out[num_lanes - N1]);
156 if (N1 == 2) output_stats.Notify(out[num_lanes - N1 + 1]);
157
158 return input_stats == output_stats;
159}
160
161template <class Traits, typename LaneType>
162bool VerifySelect(Traits st, const InputStats<LaneType>& input_stats,
163 const LaneType* out, const size_t num_lanes, const size_t k,
164 const char* caller) {
165 constexpr size_t N1 = st.LanesPerKey();
166 HWY_ASSERT(num_lanes >= N1);
167
168 InputStats<LaneType> output_stats;
169 // Ensure all of the elements below the k_th element are <= the k_th element,
170 // and all of the elements above the k_th element are >= the k_th element.
171 for (size_t i = 0; i < num_lanes - N1; i += N1) {
172 output_stats.Notify(out[i]);
173 if (N1 == 2) output_stats.Notify(out[i + 1]);
174 // Reverse order instead of checking !Compare1 so we accept equal keys.
175 if (i < k ? st.Compare1(out + k, out + i) : st.Compare1(out + i, out + k)) {
176 fprintf(stderr, "%s: i=%d of %d lanes: N1=%d k=%d\t", caller,
177 static_cast<int>(i), static_cast<int>(num_lanes),
178 static_cast<int>(N1), static_cast<int>(k));
179 fprintf(stderr, "%5.0f %5.0f vs. %5.0f %5.0f\n\n",
180 static_cast<double>(out[i]), static_cast<double>(out[i + 1]),
181 static_cast<double>(out[k]), static_cast<double>(out[k + 1]));
182 HWY_ABORT("%d-bit select is incorrect\n",
183 static_cast<int>(sizeof(LaneType) * 8 * N1));
184 }
185 }
186 output_stats.Notify(out[num_lanes - N1]);
187 if (N1 == 2) output_stats.Notify(out[num_lanes - N1 + 1]);
188
189 return input_stats == output_stats;
190}
191
192// NOLINTNEXTLINE(google-readability-namespace-comments)
193} // namespace HWY_NAMESPACE
194} // namespace hwy
196
197#endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
#define HWY_ABORT(format,...)
Definition base.h:233
#define HWY_ASSERT(condition)
Definition base.h:237
Definition algo-inl.h:126
void Notify(T value)
Definition algo-inl.h:128
#define HWY_TARGET
Definition detect_targets.h:543
bool VerifySort(Traits st, const InputStats< LaneType > &input_stats, const LaneType *out, size_t num_lanes, const char *caller)
Definition result-inl.h:131
bool VerifySelect(Traits st, const InputStats< LaneType > &input_stats, const LaneType *out, const size_t num_lanes, const size_t k, const char *caller)
Definition result-inl.h:162
bool VerifyPartialSort(Traits st, const InputStats< LaneType > &input_stats, const LaneType *out, const size_t num_lanes, const size_t k, const char *caller)
Definition result-inl.h:97
decltype(GetLane(V())) LaneType
Definition generic_ops-inl.h:39
Definition abort.h:8
static const char * DistName(Dist dist)
Definition algo-inl.h:113
Dist
Definition algo-inl.h:107
static const char * AlgoName(Algo algo)
Definition algo-inl.h:207
static HWY_MAYBE_UNUSED const char * TargetName(int64_t target)
Definition targets.h:85
static double SummarizeMeasurements(std::vector< double > &seconds)
Definition result-inl.h:35
Algo
Definition algo-inl.h:174
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()
#define HWY_NAMESPACE
Definition set_macros-inl.h:166
Definition result-inl.h:63
double sec
Definition result-inl.h:91
Result(const Algo algo, Dist dist, size_t num_keys, size_t num_threads, double sec, size_t sizeof_key, const char *key_name)
Definition result-inl.h:65
Algo algo
Definition result-inl.h:87
size_t sizeof_key
Definition result-inl.h:92
Dist dist
Definition result-inl.h:88
void Print() const
Definition result-inl.h:76
int64_t target
Definition result-inl.h:86
std::string key_name
Definition result-inl.h:93
size_t num_threads
Definition result-inl.h:90
Result()
Definition result-inl.h:64
size_t num_keys
Definition result-inl.h:89