Grok 12.0.1
futex.h
Go to the documentation of this file.
1// Copyright 2024 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef HIGHWAY_HWY_CONTRIB_THREAD_POOL_FUTEX_H_
17#define HIGHWAY_HWY_CONTRIB_THREAD_POOL_FUTEX_H_
18
19// Keyed event (futex): kernel queue of blocked threads, identified by the
20// address of an atomic u32 called `current` within the same process (do NOT
21// use with shared-memory mappings).
22//
23// Futex equivalents: https://outerproduct.net/futex-dictionary.html; we
24// support Linux/Emscripten/Apple/Windows and C++20 std::atomic::wait, plus a
25// usleep fallback.
26
27#include <atomic>
28#include <climits> // INT_MAX
29
30#include "hwy/base.h"
31
32#if HWY_ARCH_WASM
33#include <emscripten/threading.h>
34#include <math.h> // INFINITY
35
36#elif HWY_OS_LINUX
37#include <errno.h> // IWYU pragma: keep
38#include <linux/futex.h> // FUTEX_*
39#include <pthread.h>
40#include <sys/syscall.h> // SYS_*
41#include <unistd.h>
42// Android may not declare these:
43#ifndef SYS_futex
44#ifdef SYS_futex_time64 // 32-bit with 64-bit time_t
45#define SYS_futex SYS_futex_time64
46#else
47#define SYS_futex __NR_futex
48#endif // SYS_futex_time64
49#endif // SYS_futex
50#ifndef FUTEX_WAIT_PRIVATE
51#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | 128)
52#endif
53#ifndef FUTEX_WAKE_PRIVATE
54#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | 128)
55#endif
56
57#elif HWY_OS_APPLE && !defined(HWY_DISABLE_FUTEX)
58// These are private APIs, so add an opt-out.
59extern "C" {
60int __ulock_wait(uint32_t op, void* address, uint64_t val, uint32_t max_us);
61int __ulock_wake(uint32_t op, void* address, uint64_t zero);
62} // extern "C"
63#define UL_COMPARE_AND_WAIT 1
64#define ULF_WAKE_ALL 0x00000100
65
66#elif HWY_OS_WIN && !defined(HWY_DISABLE_FUTEX)
67// WakeByAddressAll requires Windows 8, so add an opt-out.
68#include <windows.h>
69#pragma comment(lib, "synchronization.lib")
70
71#elif HWY_CXX_LANG < 202002L // NOT C++20, which has native support
72#define HWY_FUTEX_SLEEP
73#include <chrono> // NOLINT (sleep_for)
74#endif
75
76namespace hwy {
77
78// Waits until `current != prev` and returns the new value. May return
79// immediately if `current` already changed, or after blocking and waking.
80static inline uint32_t BlockUntilDifferent(
81 const uint32_t prev, const std::atomic<uint32_t>& current) {
82 const auto acq = std::memory_order_acquire;
83
84#if HWY_ARCH_WASM
85 // It is always safe to cast to void.
86 volatile void* address =
87 const_cast<volatile void*>(static_cast<const volatile void*>(&current));
88 const double max_ms = INFINITY;
89 for (;;) {
90 const uint32_t next = current.load(acq);
91 if (next != prev) return next;
92 const int ret = emscripten_futex_wait(address, prev, max_ms);
93 HWY_DASSERT(ret >= 0);
94 (void)ret;
95 }
96
97#elif HWY_OS_LINUX
98 // Safe to cast because std::atomic is a standard layout type.
99 const uint32_t* address = reinterpret_cast<const uint32_t*>(&current);
100 // _PRIVATE requires this only be used in the same process, and avoids
101 // virtual->physical lookups and atomic reference counting.
102 const int op = FUTEX_WAIT_PRIVATE;
103 for (;;) {
104 const uint32_t next = current.load(acq);
105 if (next != prev) return next;
106 // timeout=null may prevent interrupts via signal. No lvalue because
107 // the timespec type is only standardized since C++17 or C11.
108 const auto ret = syscall(SYS_futex, address, op, prev, nullptr, nullptr, 0);
109 if (ret == -1) {
110 HWY_DASSERT(errno == EAGAIN); // otherwise an actual error
111 }
112 }
113
114#elif HWY_OS_WIN && !defined(HWY_DISABLE_FUTEX)
115 // It is always safe to cast to void.
116 volatile void* address =
117 const_cast<volatile void*>(static_cast<const volatile void*>(&current));
118 // API is not const-correct, but only loads from the pointer.
119 PVOID pprev = const_cast<void*>(static_cast<const void*>(&prev));
120 const DWORD max_ms = INFINITE;
121 for (;;) {
122 const uint32_t next = current.load(acq);
123 if (next != prev) return next;
124 const BOOL ok = WaitOnAddress(address, pprev, sizeof(prev), max_ms);
125 HWY_DASSERT(ok);
126 (void)ok;
127 }
128
129#elif HWY_OS_APPLE && !defined(HWY_DISABLE_FUTEX)
130 // It is always safe to cast to void.
131 void* address = const_cast<void*>(static_cast<const void*>(&current));
132 for (;;) {
133 const uint32_t next = current.load(acq);
134 if (next != prev) return next;
135 __ulock_wait(UL_COMPARE_AND_WAIT, address, prev, 0);
136 }
137
138#elif defined(HWY_FUTEX_SLEEP)
139 for (;;) {
140 const uint32_t next = current.load(acq);
141 if (next != prev) return next;
142 std::this_thread::sleep_for(std::chrono::microseconds(2));
143 }
144
145#elif HWY_CXX_LANG >= 202002L
146 current.wait(prev, acq); // No spurious wakeup.
147 const uint32_t next = current.load(acq);
148 HWY_DASSERT(next != prev);
149 return next;
150
151#else
152#error "Logic error, should have reached HWY_FUTEX_SLEEP"
153#endif // HWY_OS_*
154} // BlockUntilDifferent
155
156// Wakes all threads, if any, that are waiting because they called
157// `BlockUntilDifferent` with the same `current`.
158static inline void WakeAll(std::atomic<uint32_t>& current) {
159#if HWY_ARCH_WASM
160 // It is always safe to cast to void.
161 volatile void* address = static_cast<volatile void*>(&current);
162 const int max_to_wake = INT_MAX; // actually signed
163 const int ret = emscripten_futex_wake(address, max_to_wake);
164 HWY_DASSERT(ret >= 0);
165 (void)ret;
166
167#elif HWY_OS_LINUX
168 // Safe to cast because std::atomic is a standard layout type.
169 uint32_t* address = reinterpret_cast<uint32_t*>(&current);
170 const int max_to_wake = INT_MAX; // actually signed
171 const auto ret = syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, max_to_wake,
172 nullptr, nullptr, 0);
173 HWY_DASSERT(ret >= 0); // number woken
174 (void)ret;
175
176#elif HWY_OS_WIN && !defined(HWY_DISABLE_FUTEX)
177 // It is always safe to cast to void.
178 void* address = static_cast<void*>(&current);
179 WakeByAddressAll(address);
180
181#elif HWY_OS_APPLE && !defined(HWY_DISABLE_FUTEX)
182 // It is always safe to cast to void.
183 void* address = static_cast<void*>(&current);
184 __ulock_wake(UL_COMPARE_AND_WAIT | ULF_WAKE_ALL, address, 0);
185
186#elif defined(HWY_FUTEX_SLEEP)
187 // Sleep loop does not require wakeup.
188
189#elif HWY_CXX_LANG >= 202002L
190 current.notify_all();
191
192#else
193#error "Logic error, should have reached HWY_FUTEX_SLEEP"
194#endif
195} // WakeAll
196
197} // namespace hwy
198
199#endif // HIGHWAY_HWY_CONTRIB_THREAD_POOL_FUTEX_H_
#define HWY_DASSERT(condition)
Definition base.h:290
Definition abort.h:8
static void WakeAll(std::atomic< uint32_t > &current)
Definition futex.h:158
static uint32_t BlockUntilDifferent(const uint32_t prev, const std::atomic< uint32_t > &current)
Definition futex.h:80