Grok 12.0.1
transform-inl.h
Go to the documentation of this file.
1// Copyright 2022 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Per-target include guard
17#if defined(HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
20#undef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
21#else
22#define HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
23#endif
24
25#include <stddef.h>
26
27#include "hwy/highway.h"
28
30namespace hwy {
31namespace HWY_NAMESPACE {
32
33// These functions avoid having to write a loop plus remainder handling in the
34// (unfortunately still common) case where arrays are not aligned/padded. If the
35// inputs are known to be aligned/padded, it is more efficient to write a single
36// loop using Load(). We do not provide a TransformAlignedPadded because it
37// would be more verbose than such a loop.
38//
39// Func is either a functor with a templated operator()(d, v[, v1[, v2]]), or a
40// generic lambda if using C++14. The d argument is the same as was passed to
41// the Generate etc. functions. Due to apparent limitations of Clang, it is
42// currently necessary to add HWY_ATTR before the opening { of the lambda to
43// avoid errors about "always_inline function .. requires target".
44//
45// We do not check HWY_MEM_OPS_MIGHT_FAULT because LoadN/StoreN do not fault.
46
47// Fills `out[0, count)` with the vectors returned by `func(d, index_vec)`,
48// where `index_vec` is `Vec<RebindToUnsigned<D>>`. On the first call to `func`,
49// the value of its lane i is i, and increases by `Lanes(d)` after every call.
50// Note that some of these indices may be `>= count`, but the elements that
51// `func` returns in those lanes will not be written to `out`.
52template <class D, class Func, typename T = TFromD<D>>
53void Generate(D d, T* HWY_RESTRICT out, size_t count, const Func& func) {
54 const RebindToUnsigned<D> du;
55 using TU = TFromD<decltype(du)>;
56 const size_t N = Lanes(d);
57
58 size_t idx = 0;
59 Vec<decltype(du)> vidx = Iota(du, 0);
60 if (count >= N) {
61 for (; idx <= count - N; idx += N) {
62 StoreU(func(d, vidx), d, out + idx);
63 vidx = Add(vidx, Set(du, static_cast<TU>(N)));
64 }
65 }
66
67 // `count` was a multiple of the vector length `N`: already done.
68 if (HWY_UNLIKELY(idx == count)) return;
69
70 const size_t remaining = count - idx;
71 HWY_DASSERT(0 != remaining && remaining < N);
72 StoreN(func(d, vidx), d, out + idx, remaining);
73}
74
75// Calls `func(d, v)` for each input vector; out of bound lanes with index i >=
76// `count` are instead taken from `no[i % Lanes(d)]`.
77template <class D, class Func, typename T = TFromD<D>>
78void Foreach(D d, const T* HWY_RESTRICT in, const size_t count, const Vec<D> no,
79 const Func& func) {
80 const size_t N = Lanes(d);
81
82 size_t idx = 0;
83 if (count >= N) {
84 for (; idx <= count - N; idx += N) {
85 const Vec<D> v = LoadU(d, in + idx);
86 func(d, v);
87 }
88 }
89
90 // `count` was a multiple of the vector length `N`: already done.
91 if (HWY_UNLIKELY(idx == count)) return;
92
93 const size_t remaining = count - idx;
94 HWY_DASSERT(0 != remaining && remaining < N);
95 const Vec<D> v = LoadNOr(no, d, in + idx, remaining);
96 func(d, v);
97}
98
99// Replaces `inout[idx]` with `func(d, inout[idx])`. Example usage: multiplying
100// array elements by a constant.
101template <class D, class Func, typename T = TFromD<D>>
102void Transform(D d, T* HWY_RESTRICT inout, size_t count, const Func& func) {
103 const size_t N = Lanes(d);
104
105 size_t idx = 0;
106 if (count >= N) {
107 for (; idx <= count - N; idx += N) {
108 const Vec<D> v = LoadU(d, inout + idx);
109 StoreU(func(d, v), d, inout + idx);
110 }
111 }
112
113 // `count` was a multiple of the vector length `N`: already done.
114 if (HWY_UNLIKELY(idx == count)) return;
115
116 const size_t remaining = count - idx;
117 HWY_DASSERT(0 != remaining && remaining < N);
118 const Vec<D> v = LoadN(d, inout + idx, remaining);
119 StoreN(func(d, v), d, inout + idx, remaining);
120}
121
122// Replaces `inout[idx]` with `func(d, inout[idx], in1[idx])`. Example usage:
123// multiplying array elements by those of another array.
124template <class D, class Func, typename T = TFromD<D>>
125void Transform1(D d, T* HWY_RESTRICT inout, size_t count,
126 const T* HWY_RESTRICT in1, const Func& func) {
127 const size_t N = Lanes(d);
128
129 size_t idx = 0;
130 if (count >= N) {
131 for (; idx <= count - N; idx += N) {
132 const Vec<D> v = LoadU(d, inout + idx);
133 const Vec<D> v1 = LoadU(d, in1 + idx);
134 StoreU(func(d, v, v1), d, inout + idx);
135 }
136 }
137
138 // `count` was a multiple of the vector length `N`: already done.
139 if (HWY_UNLIKELY(idx == count)) return;
140
141 const size_t remaining = count - idx;
142 HWY_DASSERT(0 != remaining && remaining < N);
143 const Vec<D> v = LoadN(d, inout + idx, remaining);
144 const Vec<D> v1 = LoadN(d, in1 + idx, remaining);
145 StoreN(func(d, v, v1), d, inout + idx, remaining);
146}
147
148// Replaces `inout[idx]` with `func(d, inout[idx], in1[idx], in2[idx])`. Example
149// usage: FMA of elements from three arrays, stored into the first array.
150template <class D, class Func, typename T = TFromD<D>>
151void Transform2(D d, T* HWY_RESTRICT inout, size_t count,
152 const T* HWY_RESTRICT in1, const T* HWY_RESTRICT in2,
153 const Func& func) {
154 const size_t N = Lanes(d);
155
156 size_t idx = 0;
157 if (count >= N) {
158 for (; idx <= count - N; idx += N) {
159 const Vec<D> v = LoadU(d, inout + idx);
160 const Vec<D> v1 = LoadU(d, in1 + idx);
161 const Vec<D> v2 = LoadU(d, in2 + idx);
162 StoreU(func(d, v, v1, v2), d, inout + idx);
163 }
164 }
165
166 // `count` was a multiple of the vector length `N`: already done.
167 if (HWY_UNLIKELY(idx == count)) return;
168
169 const size_t remaining = count - idx;
170 HWY_DASSERT(0 != remaining && remaining < N);
171 const Vec<D> v = LoadN(d, inout + idx, remaining);
172 const Vec<D> v1 = LoadN(d, in1 + idx, remaining);
173 const Vec<D> v2 = LoadN(d, in2 + idx, remaining);
174 StoreN(func(d, v, v1, v2), d, inout + idx, remaining);
175}
176
177template <class D, typename T = TFromD<D>>
178void Replace(D d, T* HWY_RESTRICT inout, size_t count, T new_t, T old_t) {
179 const size_t N = Lanes(d);
180 const Vec<D> old_v = Set(d, old_t);
181 const Vec<D> new_v = Set(d, new_t);
182
183 size_t idx = 0;
184 if (count >= N) {
185 for (; idx <= count - N; idx += N) {
186 Vec<D> v = LoadU(d, inout + idx);
187 StoreU(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx);
188 }
189 }
190
191 // `count` was a multiple of the vector length `N`: already done.
192 if (HWY_UNLIKELY(idx == count)) return;
193
194 const size_t remaining = count - idx;
195 HWY_DASSERT(0 != remaining && remaining < N);
196 const Vec<D> v = LoadN(d, inout + idx, remaining);
197 StoreN(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx, remaining);
198}
199
200template <class D, class Func, typename T = TFromD<D>>
201void ReplaceIf(D d, T* HWY_RESTRICT inout, size_t count, T new_t,
202 const Func& func) {
203 const size_t N = Lanes(d);
204 const Vec<D> new_v = Set(d, new_t);
205
206 size_t idx = 0;
207 if (count >= N) {
208 for (; idx <= count - N; idx += N) {
209 Vec<D> v = LoadU(d, inout + idx);
210 StoreU(IfThenElse(func(d, v), new_v, v), d, inout + idx);
211 }
212 }
213
214 // `count` was a multiple of the vector length `N`: already done.
215 if (HWY_UNLIKELY(idx == count)) return;
216
217 const size_t remaining = count - idx;
218 HWY_DASSERT(0 != remaining && remaining < N);
219 const Vec<D> v = LoadN(d, inout + idx, remaining);
220 StoreN(IfThenElse(func(d, v), new_v, v), d, inout + idx, remaining);
221}
222
223// NOLINTNEXTLINE(google-readability-namespace-comments)
224} // namespace HWY_NAMESPACE
225} // namespace hwy
227
228#endif // HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
#define HWY_RESTRICT
Definition base.h:95
#define HWY_DASSERT(condition)
Definition base.h:290
#define HWY_UNLIKELY(expr)
Definition base.h:107
void Generate(D d, T *HWY_RESTRICT out, size_t count, const Func &func)
Definition transform-inl.h:53
void ReplaceIf(D d, T *HWY_RESTRICT inout, size_t count, T new_t, const Func &func)
Definition transform-inl.h:201
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition generic_ops-inl.h:7331
D d
Definition arm_sve-inl.h:1915
HWY_API VFromD< D > LoadNOr(VFromD< D > no, D d, const TFromD< D > *HWY_RESTRICT p, size_t max_lanes_to_load)
Definition emu128-inl.h:1362
HWY_API void StoreN(VFromD< D > v, D d, TFromD< D > *HWY_RESTRICT p, size_t max_lanes_to_store)
Definition emu128-inl.h:1398
HWY_API V IfThenElse(MFromD< DFromV< V > > mask, V yes, V no)
Definition arm_neon-inl.h:2992
HWY_API Vec128< uint8_t > LoadU(D, const uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:3442
void Transform2(D d, T *HWY_RESTRICT inout, size_t count, const T *HWY_RESTRICT in1, const T *HWY_RESTRICT in2, const Func &func)
Definition transform-inl.h:151
HWY_API void StoreU(Vec128< uint8_t > v, D, uint8_t *HWY_RESTRICT unaligned)
Definition arm_neon-inl.h:3689
typename D::T TFromD
Definition ops/shared-inl.h:426
HWY_API V Add(V a, V b)
Definition generic_ops-inl.h:7300
void Replace(D d, T *HWY_RESTRICT inout, size_t count, T new_t, T old_t)
Definition transform-inl.h:178
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition ops/shared-inl.h:465
void Transform(D d, T *HWY_RESTRICT inout, size_t count, const Func &func)
Definition transform-inl.h:102
HWY_API VFromD< D > Iota(D d, const T2 first)
Definition arm_neon-inl.h:1297
HWY_INLINE Vec128< TFromD< D > > Set(D, T t)
Definition arm_neon-inl.h:931
HWY_API VFromD< D > LoadN(D d, const TFromD< D > *HWY_RESTRICT p, size_t max_lanes_to_load)
Definition emu128-inl.h:1352
void Foreach(D d, const T *HWY_RESTRICT in, const size_t count, const Vec< D > no, const Func &func)
Definition transform-inl.h:78
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:46
HWY_API size_t Lanes(D)
Definition rvv-inl.h:598
void Transform1(D d, T *HWY_RESTRICT inout, size_t count, const T *HWY_RESTRICT in1, const Func &func)
Definition transform-inl.h:125
Definition abort.h:8
FuncOutput(*)(const void *, FuncInput) Func
Definition nanobenchmark.h:87
#define HWY_NAMESPACE
Definition set_macros-inl.h:166
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()