Grok 12.0.1
aligned_allocator.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
17#define HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
18
19// Memory allocator with support for alignment and offsets.
20
21#include <algorithm>
22#include <array>
23#include <cassert>
24#include <cstdint>
25#include <cstring>
26#include <initializer_list>
27#include <memory>
28#include <type_traits>
29#include <utility>
30#include <vector>
31
32#include "hwy/base.h"
33#include "hwy/per_target.h"
34
35namespace hwy {
36
37// Minimum alignment of allocated memory for use in HWY_ASSUME_ALIGNED, which
38// requires a literal. To prevent false sharing, this should be at least the
39// L1 cache line size, usually 64 bytes. However, Intel's L2 prefetchers may
40// access pairs of lines, and POWER8 also has 128.
41#define HWY_ALIGNMENT 128
42
43template <typename T>
44HWY_API constexpr bool IsAligned(T* ptr, size_t align = HWY_ALIGNMENT) {
45 return reinterpret_cast<uintptr_t>(ptr) % align == 0;
46}
47
48// Pointers to functions equivalent to malloc/free with an opaque void* passed
49// to them.
50using AllocPtr = void* (*)(void* opaque, size_t bytes);
51using FreePtr = void (*)(void* opaque, void* memory);
52
53// Returns null or a pointer to at least `payload_size` (which can be zero)
54// bytes of newly allocated memory, aligned to the larger of HWY_ALIGNMENT and
55// the vector size. Calls `alloc` with the passed `opaque` pointer to obtain
56// memory or malloc() if it is null.
57HWY_DLLEXPORT void* AllocateAlignedBytes(size_t payload_size,
58 AllocPtr alloc_ptr = nullptr,
59 void* opaque_ptr = nullptr);
60
61// Frees all memory. No effect if `aligned_pointer` == nullptr, otherwise it
62// must have been returned from a previous call to `AllocateAlignedBytes`.
63// Calls `free_ptr` with the passed `opaque_ptr` pointer to free the memory; if
64// `free_ptr` function is null, uses the default free().
65HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer,
66 FreePtr free_ptr, void* opaque_ptr);
67
68// Class that deletes the aligned pointer passed to operator() calling the
69// destructor before freeing the pointer. This is equivalent to the
70// std::default_delete but for aligned objects. For a similar deleter equivalent
71// to free() for aligned memory see AlignedFreer().
73 public:
74 AlignedDeleter() : free_(nullptr), opaque_ptr_(nullptr) {}
75 AlignedDeleter(FreePtr free_ptr, void* opaque_ptr)
76 : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
77
78 template <typename T>
79 void operator()(T* aligned_pointer) const {
80 return DeleteAlignedArray(aligned_pointer, free_, opaque_ptr_,
81 TypedArrayDeleter<T>);
82 }
83
84 private:
85 template <typename T>
86 static void TypedArrayDeleter(void* ptr, size_t size_in_bytes) {
87 size_t elems = size_in_bytes / sizeof(T);
88 for (size_t i = 0; i < elems; i++) {
89 // Explicitly call the destructor on each element.
90 (static_cast<T*>(ptr) + i)->~T();
91 }
92 }
93
94 // Function prototype that calls the destructor for each element in a typed
95 // array. TypeArrayDeleter<T> would match this prototype.
96 using ArrayDeleter = void (*)(void* t_ptr, size_t t_size);
97
98 HWY_DLLEXPORT static void DeleteAlignedArray(void* aligned_pointer,
99 FreePtr free_ptr,
100 void* opaque_ptr,
101 ArrayDeleter deleter);
102
105};
106
107// Unique pointer to T with custom aligned deleter. This can be a single
108// element U or an array of element if T is a U[]. The custom aligned deleter
109// will call the destructor on U or each element of a U[] in the array case.
110template <typename T>
111using AlignedUniquePtr = std::unique_ptr<T, AlignedDeleter>;
112
113// Aligned memory equivalent of make_unique<T> using the custom allocators
114// alloc/free with the passed `opaque` pointer. This function calls the
115// constructor with the passed Args... and calls the destructor of the object
116// when the AlignedUniquePtr is destroyed.
117template <typename T, typename... Args>
119 void* opaque, Args&&... args) {
120 T* ptr = static_cast<T*>(AllocateAlignedBytes(sizeof(T), alloc, opaque));
121 return AlignedUniquePtr<T>(new (ptr) T(std::forward<Args>(args)...),
122 AlignedDeleter(free, opaque));
123}
124
125// Similar to MakeUniqueAlignedWithAlloc but using the default alloc/free
126// functions.
127template <typename T, typename... Args>
129 T* ptr = static_cast<T*>(AllocateAlignedBytes(sizeof(T)));
130 return AlignedUniquePtr<T>(new (ptr) T(std::forward<Args>(args)...),
132}
133
134template <class T>
136 using value_type = T;
137
138 AlignedAllocator() = default;
139
140 template <class V>
141 explicit AlignedAllocator(const AlignedAllocator<V>&) noexcept {}
142
143 template <class V>
145 static_assert(std::is_integral<V>::value,
146 "AlignedAllocator only supports integer types");
147 static_assert(sizeof(V) <= sizeof(std::size_t),
148 "V n must be smaller or equal size_t to avoid overflow");
149 return static_cast<value_type*>(
150 AllocateAlignedBytes(static_cast<std::size_t>(n) * sizeof(value_type)));
151 }
152
153 template <class V>
155 return FreeAlignedBytes(p, nullptr, nullptr);
156 }
157};
158
159template <class T, class V>
160constexpr bool operator==(const AlignedAllocator<T>&,
161 const AlignedAllocator<V>&) noexcept {
162 return true;
163}
164
165template <class T, class V>
166constexpr bool operator!=(const AlignedAllocator<T>&,
167 const AlignedAllocator<V>&) noexcept {
168 return false;
169}
170
171template <class T>
172using AlignedVector = std::vector<T, AlignedAllocator<T>>;
173
174// Helpers for array allocators (avoids overflow)
175namespace detail {
176
177// Returns x such that 1u << x == n (if n is a power of two).
178static inline constexpr size_t ShiftCount(size_t n) {
179 return (n <= 1) ? 0 : 1 + ShiftCount(n / 2);
180}
181
182template <typename T>
183T* AllocateAlignedItems(size_t items, AllocPtr alloc_ptr, void* opaque_ptr) {
184 constexpr size_t size = sizeof(T);
185
186 constexpr bool is_pow2 = (size & (size - 1)) == 0;
187 constexpr size_t bits = ShiftCount(size);
188 static_assert(!is_pow2 || (1ull << bits) == size, "ShiftCount is incorrect");
189
190 const size_t bytes = is_pow2 ? items << bits : items * size;
191 const size_t check = is_pow2 ? bytes >> bits : bytes / size;
192 if (check != items) {
193 return nullptr; // overflowed
194 }
195 return static_cast<T*>(AllocateAlignedBytes(bytes, alloc_ptr, opaque_ptr));
196}
197
198} // namespace detail
199
200// Aligned memory equivalent of make_unique<T[]> for array types using the
201// custom allocators alloc/free. This function calls the constructor with the
202// passed Args... on every created item. The destructor of each element will be
203// called when the AlignedUniquePtr is destroyed.
204template <typename T, typename... Args>
206 size_t items, AllocPtr alloc, FreePtr free, void* opaque, Args&&... args) {
207 T* ptr = detail::AllocateAlignedItems<T>(items, alloc, opaque);
208 if (ptr != nullptr) {
209 for (size_t i = 0; i < items; i++) {
210 new (ptr + i) T(std::forward<Args>(args)...);
211 }
212 }
213 return AlignedUniquePtr<T[]>(ptr, AlignedDeleter(free, opaque));
214}
215
216template <typename T, typename... Args>
217AlignedUniquePtr<T[]> MakeUniqueAlignedArray(size_t items, Args&&... args) {
218 return MakeUniqueAlignedArrayWithAlloc<T, Args...>(
219 items, nullptr, nullptr, nullptr, std::forward<Args>(args)...);
220}
221
222// Custom deleter for std::unique_ptr equivalent to using free() as a deleter
223// but for aligned memory.
225 public:
226 // Pass address of this to ctor to skip deleting externally-owned memory.
227 static void DoNothing(void* /*opaque*/, void* /*aligned_pointer*/) {}
228
229 AlignedFreer() : free_(nullptr), opaque_ptr_(nullptr) {}
230 AlignedFreer(FreePtr free_ptr, void* opaque_ptr)
231 : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
232
233 template <typename T>
234 void operator()(T* aligned_pointer) const {
235 // TODO(deymo): assert that we are using a POD type T.
236 FreeAlignedBytes(aligned_pointer, free_, opaque_ptr_);
237 }
238
239 private:
242};
243
244// Unique pointer to single POD, or (if T is U[]) an array of POD. For non POD
245// data use AlignedUniquePtr.
246template <typename T>
247using AlignedFreeUniquePtr = std::unique_ptr<T, AlignedFreer>;
248
249// Allocate an aligned and uninitialized array of POD values as a unique_ptr.
250// Upon destruction of the unique_ptr the aligned array will be freed.
251template <typename T>
253 FreePtr free, void* opaque) {
255 detail::AllocateAlignedItems<T>(items, alloc, opaque),
256 AlignedFreer(free, opaque));
257}
258
259// Same as previous AllocateAligned(), using default allocate/free functions.
260template <typename T>
262 return AllocateAligned<T>(items, nullptr, nullptr, nullptr);
263}
264
265// A simple span containing data and size of data.
266template <typename T>
267class Span {
268 public:
269 Span() = default;
270 Span(T* data, size_t size) : size_(size), data_(data) {}
271 template <typename U>
272 Span(U u) : Span(u.data(), u.size()) {}
273 Span(std::initializer_list<const T> v) : Span(v.begin(), v.size()) {}
274
275 // Copies the contents of the initializer list to the span.
276 Span<T>& operator=(std::initializer_list<const T> v) {
277 HWY_DASSERT(size_ == v.size());
278 CopyBytes(v.begin(), data_, sizeof(T) * std::min(size_, v.size()));
279 return *this;
280 }
281
282 // Returns the size of the contained data.
283 size_t size() const { return size_; }
284
285 // Returns a pointer to the contained data.
286 T* data() { return data_; }
287 T* data() const { return data_; }
288
289 // Returns the element at index.
290 T& operator[](size_t index) const { return data_[index]; }
291
292 // Returns an iterator pointing to the first element of this span.
293 T* begin() { return data_; }
294
295 // Returns a const iterator pointing to the first element of this span.
296 constexpr const T* cbegin() const { return data_; }
297
298 // Returns an iterator pointing just beyond the last element at the
299 // end of this span.
300 T* end() { return data_ + size_; }
301
302 // Returns a const iterator pointing just beyond the last element at the
303 // end of this span.
304 constexpr const T* cend() const { return data_ + size_; }
305
306 private:
307 size_t size_ = 0;
308 T* data_ = nullptr;
309};
310
311// A multi dimensional array containing an aligned buffer.
312//
313// To maintain alignment, the innermost dimension will be padded to ensure all
314// innermost arrays are aligned.
315template <typename T, size_t axes>
317 static_assert(std::is_trivial<T>::value,
318 "AlignedNDArray can only contain trivial types");
319
320 public:
321 AlignedNDArray(AlignedNDArray&& other) = default;
323
324 // Constructs an array of the provided shape and fills it with zeros.
325 explicit AlignedNDArray(std::array<size_t, axes> shape) : shape_(shape) {
328 // Round the innermost dimension up to the number of bytes available for
329 // SIMD operations on this architecture to make sure that each innermost
330 // array is aligned from the first element.
331 memory_shape_[axes - 1] = RoundUpTo(memory_shape_[axes - 1], VectorBytes());
333 buffer_ = hwy::AllocateAligned<T>(memory_size());
334 hwy::ZeroBytes(buffer_.get(), memory_size() * sizeof(T));
335 }
336
337 // Returns a span containing the innermost array at the provided indices.
338 Span<T> operator[](std::array<const size_t, axes - 1> indices) {
339 return Span<T>(buffer_.get() + Offset(indices), sizes_[indices.size()]);
340 }
341
342 // Returns a const span containing the innermost array at the provided
343 // indices.
344 Span<const T> operator[](std::array<const size_t, axes - 1> indices) const {
345 return Span<const T>(buffer_.get() + Offset(indices),
346 sizes_[indices.size()]);
347 }
348
349 // Returns the shape of the array, which might be smaller than the allocated
350 // buffer after padding the last axis to alignment.
351 const std::array<size_t, axes>& shape() const { return shape_; }
352
353 // Returns the shape of the allocated buffer, which might be larger than the
354 // used size of the array after padding to alignment.
355 const std::array<size_t, axes>& memory_shape() const { return memory_shape_; }
356
357 // Returns the size of the array, which might be smaller than the allocated
358 // buffer after padding the last axis to alignment.
359 size_t size() const { return sizes_[0]; }
360
361 // Returns the size of the allocated buffer, which might be larger than the
362 // used size of the array after padding to alignment.
363 size_t memory_size() const { return memory_sizes_[0]; }
364
365 // Returns a pointer to the allocated buffer.
366 T* data() { return buffer_.get(); }
367
368 // Returns a const pointer to the buffer.
369 const T* data() const { return buffer_.get(); }
370
371 // Truncates the array by updating its shape.
372 //
373 // The new shape must be equal to or less than the old shape in all axes.
374 //
375 // Doesn't modify underlying memory.
376 void truncate(const std::array<size_t, axes>& new_shape) {
377#if HWY_IS_DEBUG_BUILD
378 for (size_t axis_index = 0; axis_index < axes; ++axis_index) {
379 HWY_ASSERT(new_shape[axis_index] <= shape_[axis_index]);
380 }
381#endif
382 shape_ = new_shape;
384 }
385
386 private:
387 std::array<size_t, axes> shape_;
388 std::array<size_t, axes> memory_shape_;
389 std::array<size_t, axes + 1> sizes_;
390 std::array<size_t, axes + 1> memory_sizes_;
392
393 // Computes offset in the buffer based on the provided indices.
394 size_t Offset(std::array<const size_t, axes - 1> indices) const {
395 size_t offset = 0;
396 size_t shape_index = 0;
397 for (const size_t axis_index : indices) {
398 offset += memory_sizes_[shape_index + 1] * axis_index;
399 shape_index++;
400 }
401 return offset;
402 }
403
404 // Computes the sizes of all sub arrays based on the sizes of each axis.
405 //
406 // Does this by multiplying the size of each axis with the previous one in
407 // reverse order, starting with the conceptual axis of size 1 containing the
408 // actual elements in the array.
409 static std::array<size_t, axes + 1> ComputeSizes(
410 std::array<size_t, axes> shape) {
411 std::array<size_t, axes + 1> sizes;
412 size_t axis = shape.size();
413 sizes[axis] = 1;
414 while (axis > 0) {
415 --axis;
416 sizes[axis] = sizes[axis + 1] * shape[axis];
417 }
418 return sizes;
419 }
420};
421
422} // namespace hwy
423#endif // HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
#define HWY_ALIGNMENT
Definition aligned_allocator.h:41
#define HWY_API
Definition base.h:171
#define HWY_DASSERT(condition)
Definition base.h:290
#define HWY_MAYBE_UNUSED
Definition base.h:113
#define HWY_ASSERT(condition)
Definition base.h:237
Definition aligned_allocator.h:72
void * opaque_ptr_
Definition aligned_allocator.h:104
void operator()(T *aligned_pointer) const
Definition aligned_allocator.h:79
AlignedDeleter(FreePtr free_ptr, void *opaque_ptr)
Definition aligned_allocator.h:75
AlignedDeleter()
Definition aligned_allocator.h:74
void(*)(void *t_ptr, size_t t_size) ArrayDeleter
Definition aligned_allocator.h:96
FreePtr free_
Definition aligned_allocator.h:103
static HWY_DLLEXPORT void DeleteAlignedArray(void *aligned_pointer, FreePtr free_ptr, void *opaque_ptr, ArrayDeleter deleter)
static void TypedArrayDeleter(void *ptr, size_t size_in_bytes)
Definition aligned_allocator.h:86
Definition aligned_allocator.h:224
AlignedFreer()
Definition aligned_allocator.h:229
void operator()(T *aligned_pointer) const
Definition aligned_allocator.h:234
AlignedFreer(FreePtr free_ptr, void *opaque_ptr)
Definition aligned_allocator.h:230
void * opaque_ptr_
Definition aligned_allocator.h:241
FreePtr free_
Definition aligned_allocator.h:240
static void DoNothing(void *, void *)
Definition aligned_allocator.h:227
Definition aligned_allocator.h:316
AlignedNDArray & operator=(AlignedNDArray &&other)=default
AlignedNDArray(std::array< size_t, axes > shape)
Definition aligned_allocator.h:325
Span< T > operator[](std::array< const size_t, axes - 1 > indices)
Definition aligned_allocator.h:338
std::array< size_t, axes+1 > memory_sizes_
Definition aligned_allocator.h:390
const T * data() const
Definition aligned_allocator.h:369
size_t size() const
Definition aligned_allocator.h:359
hwy::AlignedFreeUniquePtr< T[]> buffer_
Definition aligned_allocator.h:391
const std::array< size_t, axes > & shape() const
Definition aligned_allocator.h:351
Span< const T > operator[](std::array< const size_t, axes - 1 > indices) const
Definition aligned_allocator.h:344
T * data()
Definition aligned_allocator.h:366
std::array< size_t, axes+1 > sizes_
Definition aligned_allocator.h:389
void truncate(const std::array< size_t, axes > &new_shape)
Definition aligned_allocator.h:376
AlignedNDArray(AlignedNDArray &&other)=default
const std::array< size_t, axes > & memory_shape() const
Definition aligned_allocator.h:355
static std::array< size_t, axes+1 > ComputeSizes(std::array< size_t, axes > shape)
Definition aligned_allocator.h:409
std::array< size_t, axes > memory_shape_
Definition aligned_allocator.h:388
size_t memory_size() const
Definition aligned_allocator.h:363
std::array< size_t, axes > shape_
Definition aligned_allocator.h:387
size_t Offset(std::array< const size_t, axes - 1 > indices) const
Definition aligned_allocator.h:394
Definition aligned_allocator.h:267
T & operator[](size_t index) const
Definition aligned_allocator.h:290
size_t size() const
Definition aligned_allocator.h:283
T * begin()
Definition aligned_allocator.h:293
Span(U u)
Definition aligned_allocator.h:272
T * data_
Definition aligned_allocator.h:308
Span()=default
Span(T *data, size_t size)
Definition aligned_allocator.h:270
Span< T > & operator=(std::initializer_list< const T > v)
Definition aligned_allocator.h:276
constexpr const T * cbegin() const
Definition aligned_allocator.h:296
Span(std::initializer_list< const T > v)
Definition aligned_allocator.h:273
constexpr const T * cend() const
Definition aligned_allocator.h:304
T * data()
Definition aligned_allocator.h:286
size_t size_
Definition aligned_allocator.h:307
T * end()
Definition aligned_allocator.h:300
T * data() const
Definition aligned_allocator.h:287
#define HWY_DLLEXPORT
Definition highway_export.h:13
static constexpr size_t ShiftCount(size_t n)
Definition aligned_allocator.h:178
T * AllocateAlignedItems(size_t items, AllocPtr alloc_ptr, void *opaque_ptr)
Definition aligned_allocator.h:183
Definition abort.h:8
HWY_API void CopyBytes(const From *from, To *to)
Definition base.h:327
HWY_DLLEXPORT size_t VectorBytes()
std::unique_ptr< T, AlignedFreer > AlignedFreeUniquePtr
Definition aligned_allocator.h:247
HWY_API void ZeroBytes(To *to)
Definition base.h:352
std::unique_ptr< T, AlignedDeleter > AlignedUniquePtr
Definition aligned_allocator.h:111
constexpr bool operator!=(const AlignedAllocator< T > &, const AlignedAllocator< V > &) noexcept
Definition aligned_allocator.h:166
AlignedUniquePtr< T[]> MakeUniqueAlignedArrayWithAlloc(size_t items, AllocPtr alloc, FreePtr free, void *opaque, Args &&... args)
Definition aligned_allocator.h:205
constexpr bool operator==(const AlignedAllocator< T > &, const AlignedAllocator< V > &) noexcept
Definition aligned_allocator.h:160
void *(*)(void *opaque, size_t bytes) AllocPtr
Definition aligned_allocator.h:50
void(*)(void *opaque, void *memory) FreePtr
Definition aligned_allocator.h:51
HWY_DLLEXPORT void * AllocateAlignedBytes(size_t payload_size, AllocPtr alloc_ptr=nullptr, void *opaque_ptr=nullptr)
AlignedUniquePtr< T[]> MakeUniqueAlignedArray(size_t items, Args &&... args)
Definition aligned_allocator.h:217
AlignedFreeUniquePtr< T[]> AllocateAligned(const size_t items, AllocPtr alloc, FreePtr free, void *opaque)
Definition aligned_allocator.h:252
HWY_API constexpr bool IsAligned(T *ptr, size_t align=HWY_ALIGNMENT)
Definition aligned_allocator.h:44
HWY_DLLEXPORT void FreeAlignedBytes(const void *aligned_pointer, FreePtr free_ptr, void *opaque_ptr)
AlignedUniquePtr< T > MakeUniqueAlignedWithAlloc(AllocPtr alloc, FreePtr free, void *opaque, Args &&... args)
Definition aligned_allocator.h:118
constexpr size_t RoundUpTo(size_t what, size_t align)
Definition base.h:2490
AlignedUniquePtr< T > MakeUniqueAligned(Args &&... args)
Definition aligned_allocator.h:128
std::vector< T, AlignedAllocator< T > > AlignedVector
Definition aligned_allocator.h:172
Definition aligned_allocator.h:135
void deallocate(value_type *p, HWY_MAYBE_UNUSED V n)
Definition aligned_allocator.h:154
T value_type
Definition aligned_allocator.h:136
value_type * allocate(V n)
Definition aligned_allocator.h:144
AlignedAllocator()=default
AlignedAllocator(const AlignedAllocator< V > &) noexcept
Definition aligned_allocator.h:141