16#if defined(HIGHWAY_HWY_CONTRIB_RANDOM_RANDOM_H_) == \
17 defined(HWY_TARGET_TOGGLE)
18#ifdef HIGHWAY_HWY_CONTRIB_RANDOM_RANDOM_H_
19#undef HIGHWAY_HWY_CONTRIB_RANDOM_RANDOM_H_
21#define HIGHWAY_HWY_CONTRIB_RANDOM_RANDOM_H_
41#if __cpp_hex_float > 201603L
42constexpr double kMulConst = 0x1.0p-53;
44constexpr double kMulConst =
45 0.00000000000000011102230246251565404236316680908203125;
50constexpr std::uint64_t kJump[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c,
51 0xa9582618e03fc9aa, 0x39abdc4529b1661c};
53constexpr std::uint64_t kLongJump[] = {0x76e15d3efefdcbbf, 0xc5004e441c522fb3,
54 0x77710069854ee241, 0x39109bb02acbe635};
59 constexpr explicit SplitMix64(
const std::uint64_t state) noexcept
63 std::uint64_t z = (
state_ += 0x9e3779b97f4a7c15);
64 z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
65 z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
78 for (
auto &element :
state_) {
79 element = splitMix64();
84 const std::uint64_t thread_id) noexcept
86 for (
auto i = UINT64_C(0); i < thread_id; ++i) {
95 return static_cast<double>(
Next() >> 11) * kMulConst;
104 std::array<std::uint64_t, 4> state)
noexcept {
111 static constexpr std::uint64_t
StateSize() noexcept {
return 4; }
127 static constexpr std::uint64_t
Rotl(
const std::uint64_t x,
int k)
noexcept {
128 return (x << k) | (x >> (64 - k));
133 const std::uint64_t t =
state_[1] << 17;
148 std::uint64_t s0 = 0;
149 std::uint64_t s1 = 0;
150 std::uint64_t s2 = 0;
151 std::uint64_t s3 = 0;
153 for (
const std::uint64_t i : jumpArray)
154 for (std::uint_fast8_t b = 0; b < 64; b++) {
155 if (i & std::uint64_t{1UL} << b) {
182 const std::uint64_t threadNumber = 0)
185 streams{state_.
shape().back()} {
186 internal::Xoshiro xoshiro{seed};
188 for (std::uint64_t i = 0; i < threadNumber; ++i) {
192 for (
size_t i = 0UL; i < streams; ++i) {
193 const auto state = xoshiro.GetState();
195 state_[{j}][i] = state[j];
206 auto s0 =
Load(tag, state_[{0}].data());
207 auto s1 =
Load(tag, state_[{1}].data());
208 auto s2 =
Load(tag, state_[{2}].data());
209 auto s3 =
Load(tag, state_[{3}].data());
210 for (std::uint64_t i = 0; i < n; i +=
Lanes(tag)) {
211 const auto next = Update(s0, s1, s2, s3);
212 Store(next, tag, result.data() + i);
214 Store(s0, tag, state_[{0}].data());
215 Store(s1, tag, state_[{1}].data());
216 Store(s2, tag, state_[{2}].data());
217 Store(s3, tag, state_[{3}].data());
221 template <std::u
int64_t N>
225 auto s0 =
Load(tag, state_[{0}].data());
226 auto s1 =
Load(tag, state_[{1}].data());
227 auto s2 =
Load(tag, state_[{2}].data());
228 auto s3 =
Load(tag, state_[{3}].data());
229 for (std::uint64_t i = 0; i < N; i +=
Lanes(tag)) {
230 const auto next = Update(s0, s1, s2, s3);
231 Store(next, tag, result.data() + i);
233 Store(s0, tag, state_[{0}].data());
234 Store(s1, tag, state_[{1}].data());
235 Store(s2, tag, state_[{2}].data());
236 Store(s3, tag, state_[{3}].data());
250 const auto MUL_VALUE =
Set(real_tag, internal::kMulConst);
251 const auto bits = ShiftRight<11>(Next());
252 const auto real =
ConvertTo(real_tag, bits);
253 return Mul(real, MUL_VALUE);
256 AlignedVector<double> Uniform(
const std::size_t n) {
257 AlignedVector<double> result(n);
258 const ScalableTag<std::uint64_t> tag{};
259 const ScalableTag<double> real_tag{};
260 const auto MUL_VALUE =
Set(real_tag, internal::kMulConst);
262 auto s0 =
Load(tag, state_[{0}].data());
263 auto s1 =
Load(tag, state_[{1}].data());
264 auto s2 =
Load(tag, state_[{2}].data());
265 auto s3 =
Load(tag, state_[{3}].data());
267 for (std::uint64_t i = 0; i < n; i +=
Lanes(real_tag)) {
268 const auto next = Update(s0, s1, s2, s3);
269 const auto bits = ShiftRight<11>(next);
270 const auto real =
ConvertTo(real_tag, bits);
271 const auto uniform =
Mul(real, MUL_VALUE);
272 Store(uniform, real_tag, result.data() + i);
275 Store(s0, tag, state_[{0}].data());
276 Store(s1, tag, state_[{1}].data());
277 Store(s2, tag, state_[{2}].data());
278 Store(s3, tag, state_[{3}].data());
282 template <std::u
int64_t N>
283 std::array<double, N> Uniform() noexcept {
285 const ScalableTag<std::uint64_t> tag{};
286 const ScalableTag<double> real_tag{};
287 const auto MUL_VALUE =
Set(real_tag, internal::kMulConst);
289 auto s0 =
Load(tag, state_[{0}].data());
290 auto s1 =
Load(tag, state_[{1}].data());
291 auto s2 =
Load(tag, state_[{2}].data());
292 auto s3 =
Load(tag, state_[{3}].data());
294 for (std::uint64_t i = 0; i < N; i +=
Lanes(real_tag)) {
295 const auto next = Update(s0, s1, s2, s3);
296 const auto bits = ShiftRight<11>(next);
297 const auto real =
ConvertTo(real_tag, bits);
298 const auto uniform =
Mul(real, MUL_VALUE);
299 Store(uniform, real_tag, result.data() + i);
302 Store(s0, tag, state_[{0}].data());
303 Store(s1, tag, state_[{1}].data());
304 Store(s2, tag, state_[{2}].data());
305 Store(s3, tag, state_[{3}].data());
317 const auto result =
Add(RotateRight<41>(
Add(s0, s3)), s0);
318 const auto t = ShiftLeft<17>(s1);
324 s3 = RotateRight<19>(s3);
330 auto s0 =
Load(tag, state_[{0}].
data());
331 auto s1 =
Load(tag, state_[{1}].
data());
332 auto s2 =
Load(tag, state_[{2}].
data());
333 auto s3 =
Load(tag, state_[{3}].
data());
334 auto result = Update(s0, s1, s2, s3);
343template <std::u
int64_t size = 1024>
349 return (std::numeric_limits<result_type>::min)();
353 return (std::numeric_limits<result_type>::max)();
358 : generator_{seed, threadNumber},
359 cache_{generator_.operator()<size>()},
364 cache_ = std::move(generator_.operator()<size>());
367 return cache_[index_++];
375 static_assert((size & (size - 1)) == 0 && size != 0,
376 "only power of 2 are supported");
#define HWY_ALIGNMENT
Definition aligned_allocator.h:41
#define HWY_CXX14_CONSTEXPR
Definition base.h:304
#define HWY_INLINE
Definition base.h:101
#define HWY_CXX17_CONSTEXPR
Definition base.h:299
#define HWY_UNLIKELY(expr)
Definition base.h:107
const std::array< size_t, axes > & shape() const
Definition aligned_allocator.h:351
T * data()
Definition aligned_allocator.h:366
Definition random-inl.h:344
std::size_t index_
Definition random-inl.h:373
std::uint64_t result_type
Definition random-inl.h:346
result_type operator()() noexcept
Definition random-inl.h:362
CachedXoshiro(const result_type seed, const result_type threadNumber=0)
Definition random-inl.h:356
VectorXoshiro generator_
Definition random-inl.h:371
Definition random-inl.h:173
const std::uint64_t streams
Definition random-inl.h:313
const StateType & GetState() const
Definition random-inl.h:244
static HWY_INLINE VU64 Update(VU64 &s0, VU64 &s1, VU64 &s2, VU64 &s3) noexcept
Definition random-inl.h:315
AlignedVector< std::uint64_t > operator()(const std::size_t n)
Definition random-inl.h:203
StateType state_
Definition random-inl.h:312
Vec< ScalableTag< std::uint64_t > > VU64
Definition random-inl.h:175
HWY_INLINE VU64 Next() noexcept
Definition random-inl.h:328
HWY_INLINE VU64 operator()() noexcept
Definition random-inl.h:201
VectorXoshiro(const std::uint64_t seed, const std::uint64_t threadNumber=0)
Definition random-inl.h:181
std::array< std::uint64_t, N > operator()() noexcept
Definition random-inl.h:222
std::uint64_t StateSize() const noexcept
Definition random-inl.h:240
Definition random-inl.h:57
constexpr SplitMix64(const std::uint64_t state) noexcept
Definition random-inl.h:59
HWY_CXX14_CONSTEXPR std::uint64_t operator()()
Definition random-inl.h:62
std::uint64_t state_
Definition random-inl.h:70
Definition random-inl.h:73
std::uint64_t state_[4]
Definition random-inl.h:125
HWY_CXX14_CONSTEXPR std::uint64_t Next() noexcept
Definition random-inl.h:131
HWY_CXX14_CONSTEXPR std::uint64_t operator()() noexcept
Definition random-inl.h:91
HWY_CXX14_CONSTEXPR std::array< std::uint64_t, 4 > GetState() const
Definition random-inl.h:99
HWY_CXX14_CONSTEXPR Xoshiro(const std::uint64_t seed) noexcept
Definition random-inl.h:75
static constexpr std::uint64_t StateSize() noexcept
Definition random-inl.h:111
static constexpr std::uint64_t Rotl(const std::uint64_t x, int k) noexcept
Definition random-inl.h:127
HWY_CXX14_CONSTEXPR void Jump() noexcept
Definition random-inl.h:116
HWY_CXX14_CONSTEXPR void LongJump() noexcept
Definition random-inl.h:122
HWY_CXX14_CONSTEXPR void Jump(const std::uint64_t(&jumpArray)[4]) noexcept
Definition random-inl.h:147
HWY_CXX17_CONSTEXPR void SetState(std::array< std::uint64_t, 4 > state) noexcept
Definition random-inl.h:103
HWY_CXX14_CONSTEXPR Xoshiro(const std::uint64_t seed, const std::uint64_t thread_id) noexcept
Definition random-inl.h:83
HWY_API void Store(VFromD< D > v, D d, TFromD< D > *HWY_RESTRICT aligned)
Definition arm_neon-inl.h:3911
HWY_API Vec128< float > ConvertTo(D, Vec128< int32_t > v)
Definition arm_neon-inl.h:3971
HWY_API VFromD< D > Load(D d, const TFromD< D > *HWY_RESTRICT p)
Definition arm_neon-inl.h:3664
HWY_API V Add(V a, V b)
Definition generic_ops-inl.h:7300
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition arm_neon-inl.h:2739
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition ops/shared-inl.h:367
HWY_INLINE Vec128< TFromD< D > > Set(D, T t)
Definition arm_neon-inl.h:931
decltype(Zero(D())) Vec
Definition generic_ops-inl.h:46
HWY_API size_t Lanes(D)
Definition rvv-inl.h:598
HWY_API V Mul(V a, V b)
Definition generic_ops-inl.h:7309
std::vector< T, AlignedAllocator< T > > AlignedVector
Definition aligned_allocator.h:172
#define HWY_NAMESPACE
Definition set_macros-inl.h:166