Grok 12.0.1
vqsort.h
Go to the documentation of this file.
1// Copyright 2022 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Interface to vectorized quicksort with dynamic dispatch. For static dispatch
17// without any DLLEXPORT, avoid including this header and instead define
18// VQSORT_ONLY_STATIC, then call VQSortStatic* in vqsort-inl.h.
19//
20// Blog post: https://tinyurl.com/vqsort-blog
21// Paper with measurements: https://arxiv.org/abs/2205.05982
22//
23// To ensure the overhead of using wide vectors (e.g. AVX2 or AVX-512) is
24// worthwhile, we recommend using this code for sorting arrays whose size is at
25// least 100 KiB. See the README for details.
26
27#ifndef HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
28#define HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
29
30// IWYU pragma: begin_exports
31#include "hwy/base.h"
32#include "hwy/contrib/sort/order.h" // SortAscending
33// IWYU pragma: end_exports
34
35namespace hwy {
36
37// Vectorized Quicksort: sorts keys[0, n). Does not preserve the ordering of
38// equivalent keys (defined as: neither greater nor less than another).
39// Dispatches to the best available instruction set. Does not allocate memory.
40// Uses about 1.2 KiB stack plus an internal 3-word TLS cache for random state.
41HWY_CONTRIB_DLLEXPORT void VQSort(uint16_t* HWY_RESTRICT keys, const size_t n,
43HWY_CONTRIB_DLLEXPORT void VQSort(uint16_t* HWY_RESTRICT keys, const size_t n,
45HWY_CONTRIB_DLLEXPORT void VQSort(uint32_t* HWY_RESTRICT keys, const size_t n,
47HWY_CONTRIB_DLLEXPORT void VQSort(uint32_t* HWY_RESTRICT keys, const size_t n,
49HWY_CONTRIB_DLLEXPORT void VQSort(uint64_t* HWY_RESTRICT keys, const size_t n,
51HWY_CONTRIB_DLLEXPORT void VQSort(uint64_t* HWY_RESTRICT keys, const size_t n,
53HWY_CONTRIB_DLLEXPORT void VQSort(int16_t* HWY_RESTRICT keys, const size_t n,
55HWY_CONTRIB_DLLEXPORT void VQSort(int16_t* HWY_RESTRICT keys, const size_t n,
57HWY_CONTRIB_DLLEXPORT void VQSort(int32_t* HWY_RESTRICT keys, const size_t n,
59HWY_CONTRIB_DLLEXPORT void VQSort(int32_t* HWY_RESTRICT keys, const size_t n,
61HWY_CONTRIB_DLLEXPORT void VQSort(int64_t* HWY_RESTRICT keys, const size_t n,
63HWY_CONTRIB_DLLEXPORT void VQSort(int64_t* HWY_RESTRICT keys, const size_t n,
65
66// These two must only be called if hwy::HaveFloat16() is true.
71
72HWY_CONTRIB_DLLEXPORT void VQSort(float* HWY_RESTRICT keys, const size_t n,
74HWY_CONTRIB_DLLEXPORT void VQSort(float* HWY_RESTRICT keys, const size_t n,
76
77// These two must only be called if hwy::HaveFloat64() is true.
78HWY_CONTRIB_DLLEXPORT void VQSort(double* HWY_RESTRICT keys, const size_t n,
80HWY_CONTRIB_DLLEXPORT void VQSort(double* HWY_RESTRICT keys, const size_t n,
82
95
96// Vectorized QuickPartialsort:
97// Rearranges elements such that the range [0, k) contains the sorted k − first
98// smallest elements in the range [0, n). Does not preserve the ordering of
99// equivalent keys (defined as: neither greater nor less than another).
100// Dispatches to the best available instruction set. Does not allocate memory.
101// Uses about 1.2 KiB stack plus an internal 3-word TLS cache for random state.
103 const size_t n, const size_t k,
106 const size_t n, const size_t k,
109 const size_t n, const size_t k,
112 const size_t n, const size_t k,
115 const size_t n, const size_t k,
118 const size_t n, const size_t k,
121 const size_t n, const size_t k,
124 const size_t n, const size_t k,
127 const size_t n, const size_t k,
130 const size_t n, const size_t k,
133 const size_t n, const size_t k,
136 const size_t n, const size_t k,
138
139// These two must only be called if hwy::HaveFloat16() is true.
141 const size_t n, const size_t k,
144 const size_t n, const size_t k,
146
148 const size_t n, const size_t k,
151 const size_t n, const size_t k,
153
154// These two must only be called if hwy::HaveFloat64() is true.
156 const size_t n, const size_t k,
159 const size_t n, const size_t k,
161
163 const size_t n, const size_t k,
166 const size_t n, const size_t k,
169 const size_t n, const size_t k,
172 const size_t n, const size_t k,
175 const size_t n, const size_t k,
178 const size_t n, const size_t k,
180
181// Vectorized Quickselect
182// rearranges elements in [0, n) such that:
183// The element pointed at by kth is changed to whatever element would occur in
184// that position if [0, n) were sorted. All of the elements before this new kth
185// element are less than or equal to the elements after the new kth element.
186HWY_CONTRIB_DLLEXPORT void VQSelect(uint16_t* HWY_RESTRICT keys, const size_t n,
187 const size_t k, SortAscending);
188HWY_CONTRIB_DLLEXPORT void VQSelect(uint16_t* HWY_RESTRICT keys, const size_t n,
189 const size_t k, SortDescending);
190HWY_CONTRIB_DLLEXPORT void VQSelect(uint32_t* HWY_RESTRICT keys, const size_t n,
191 const size_t k, SortAscending);
192HWY_CONTRIB_DLLEXPORT void VQSelect(uint32_t* HWY_RESTRICT keys, const size_t n,
193 const size_t k, SortDescending);
194HWY_CONTRIB_DLLEXPORT void VQSelect(uint64_t* HWY_RESTRICT keys, const size_t n,
195 const size_t k, SortAscending);
196HWY_CONTRIB_DLLEXPORT void VQSelect(uint64_t* HWY_RESTRICT keys, const size_t n,
197 const size_t k, SortDescending);
198HWY_CONTRIB_DLLEXPORT void VQSelect(int16_t* HWY_RESTRICT keys, const size_t n,
199 const size_t k, SortAscending);
200HWY_CONTRIB_DLLEXPORT void VQSelect(int16_t* HWY_RESTRICT keys, const size_t n,
201 const size_t k, SortDescending);
202HWY_CONTRIB_DLLEXPORT void VQSelect(int32_t* HWY_RESTRICT keys, const size_t n,
203 const size_t k, SortAscending);
204HWY_CONTRIB_DLLEXPORT void VQSelect(int32_t* HWY_RESTRICT keys, const size_t n,
205 const size_t k, SortDescending);
206HWY_CONTRIB_DLLEXPORT void VQSelect(int64_t* HWY_RESTRICT keys, const size_t n,
207 const size_t k, SortAscending);
208HWY_CONTRIB_DLLEXPORT void VQSelect(int64_t* HWY_RESTRICT keys, const size_t n,
209 const size_t k, SortDescending);
210
211// These two must only be called if hwy::HaveFloat16() is true.
213 const size_t n, const size_t k,
216 const size_t n, const size_t k,
218
219HWY_CONTRIB_DLLEXPORT void VQSelect(float* HWY_RESTRICT keys, const size_t n,
220 const size_t k, SortAscending);
221HWY_CONTRIB_DLLEXPORT void VQSelect(float* HWY_RESTRICT keys, const size_t n,
222 const size_t k, SortDescending);
223
224// These two must only be called if hwy::HaveFloat64() is true.
225HWY_CONTRIB_DLLEXPORT void VQSelect(double* HWY_RESTRICT keys, const size_t n,
226 const size_t k, SortAscending);
227HWY_CONTRIB_DLLEXPORT void VQSelect(double* HWY_RESTRICT keys, const size_t n,
228 const size_t k, SortDescending);
229
231 const size_t n, const size_t k,
234 const size_t n, const size_t k,
237 const size_t k, SortAscending);
239 const size_t k, SortDescending);
241 const size_t k, SortAscending);
243 const size_t k, SortDescending);
244
245// User-level caching is no longer required, so this class is no longer
246// beneficial. We recommend using the simpler VQSort() interface instead, and
247// retain this class only for compatibility. It now just calls VQSort.
249 public:
251 ~Sorter() { Delete(); }
252
253 // Move-only
254 Sorter(const Sorter&) = delete;
255 Sorter& operator=(const Sorter&) = delete;
256 Sorter(Sorter&& /*other*/) {}
257 Sorter& operator=(Sorter&& /*other*/) { return *this; }
258
259 void operator()(uint16_t* HWY_RESTRICT keys, const size_t n,
260 SortAscending) const;
261 void operator()(uint16_t* HWY_RESTRICT keys, const size_t n,
262 SortDescending) const;
263 void operator()(uint32_t* HWY_RESTRICT keys, const size_t n,
264 SortAscending) const;
265 void operator()(uint32_t* HWY_RESTRICT keys, const size_t n,
266 SortDescending) const;
267 void operator()(uint64_t* HWY_RESTRICT keys, const size_t n,
268 SortAscending) const;
269 void operator()(uint64_t* HWY_RESTRICT keys, const size_t n,
270 SortDescending) const;
271
272 void operator()(int16_t* HWY_RESTRICT keys, const size_t n,
273 SortAscending) const;
274 void operator()(int16_t* HWY_RESTRICT keys, const size_t n,
275 SortDescending) const;
276 void operator()(int32_t* HWY_RESTRICT keys, const size_t n,
277 SortAscending) const;
278 void operator()(int32_t* HWY_RESTRICT keys, const size_t n,
279 SortDescending) const;
280 void operator()(int64_t* HWY_RESTRICT keys, const size_t n,
281 SortAscending) const;
282 void operator()(int64_t* HWY_RESTRICT keys, const size_t n,
283 SortDescending) const;
284
285 // These two must only be called if hwy::HaveFloat16() is true.
286 void operator()(float16_t* HWY_RESTRICT keys, const size_t n,
287 SortAscending) const;
288 void operator()(float16_t* HWY_RESTRICT keys, const size_t n,
289 SortDescending) const;
290
291 void operator()(float* HWY_RESTRICT keys, const size_t n,
292 SortAscending) const;
293 void operator()(float* HWY_RESTRICT keys, const size_t n,
294 SortDescending) const;
295
296 // These two must only be called if hwy::HaveFloat64() is true.
297 void operator()(double* HWY_RESTRICT keys, const size_t n,
298 SortAscending) const;
299 void operator()(double* HWY_RESTRICT keys, const size_t n,
300 SortDescending) const;
301
302 void operator()(uint128_t* HWY_RESTRICT keys, const size_t n,
303 SortAscending) const;
304 void operator()(uint128_t* HWY_RESTRICT keys, const size_t n,
305 SortDescending) const;
306
307 void operator()(K64V64* HWY_RESTRICT keys, const size_t n,
308 SortAscending) const;
309 void operator()(K64V64* HWY_RESTRICT keys, const size_t n,
310 SortDescending) const;
311
312 void operator()(K32V32* HWY_RESTRICT keys, const size_t n,
313 SortAscending) const;
314 void operator()(K32V32* HWY_RESTRICT keys, const size_t n,
315 SortDescending) const;
316
317 // Unused
318 static void Fill24Bytes(const void*, size_t, void*);
319 static bool HaveFloat64(); // Can also use hwy::HaveFloat64 directly.
320
321 private:
322 void Delete();
323
324 template <typename T>
325 T* Get() const {
326 return unused_;
327 }
328
329#if HWY_COMPILER_CLANG
330 HWY_DIAGNOSTICS(push)
331 HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wunused-private-field")
332#endif
333 void* unused_ = nullptr;
334#if HWY_COMPILER_CLANG
335 HWY_DIAGNOSTICS(pop)
336#endif
337};
338
339// Used by vqsort-inl unless VQSORT_ONLY_STATIC.
341
342// Unused, only provided for binary compatibility.
344
345} // namespace hwy
346
347#endif // HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
#define HWY_RESTRICT
Definition base.h:95
#define HWY_DIAGNOSTICS(tokens)
Definition base.h:109
#define HWY_DIAGNOSTICS_OFF(msc, gcc)
Definition base.h:110
Definition vqsort.h:248
void operator()(int16_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
Sorter(const Sorter &)=delete
void operator()(uint16_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
void operator()(float *HWY_RESTRICT keys, const size_t n, SortAscending) const
void operator()(int32_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
static bool HaveFloat64()
void operator()(float *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(int64_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(double *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(float16_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
void operator()(K32V32 *HWY_RESTRICT keys, const size_t n, SortAscending) const
void operator()(double *HWY_RESTRICT keys, const size_t n, SortAscending) const
T * Get() const
Definition vqsort.h:325
void operator()(K64V64 *HWY_RESTRICT keys, const size_t n, SortAscending) const
static void Fill24Bytes(const void *, size_t, void *)
void operator()(K64V64 *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(K32V32 *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(uint128_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
void operator()(int64_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
Sorter & operator=(Sorter &&)
Definition vqsort.h:257
void operator()(float16_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
void Delete()
void operator()(uint16_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(uint128_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(uint32_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
void operator()(int32_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(int16_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
void operator()(uint64_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
void operator()(uint32_t *HWY_RESTRICT keys, const size_t n, SortDescending) const
Sorter(Sorter &&)
Definition vqsort.h:256
Sorter & operator=(const Sorter &)=delete
~Sorter()
Definition vqsort.h:251
void operator()(uint64_t *HWY_RESTRICT keys, const size_t n, SortAscending) const
#define HWY_CONTRIB_DLLEXPORT
Definition highway_export.h:14
Definition abort.h:8
HWY_CONTRIB_DLLEXPORT uint64_t * GetGeneratorState()
HWY_CONTRIB_DLLEXPORT void VQPartialSort(uint16_t *HWY_RESTRICT keys, const size_t n, const size_t k, SortAscending)
HWY_CONTRIB_DLLEXPORT bool Fill16BytesSecure(void *bytes)
HWY_CONTRIB_DLLEXPORT void VQSort(uint16_t *HWY_RESTRICT keys, const size_t n, SortAscending)
HWY_CONTRIB_DLLEXPORT void VQSelect(uint16_t *HWY_RESTRICT keys, const size_t n, const size_t k, SortAscending)
Definition base.h:426
Definition base.h:419
Definition order.h:25
Definition order.h:28
Definition base.h:1117
Definition base.h:412