Grok 12.0.1
set_macros-inl.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// Copyright 2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
3// SPDX-License-Identifier: Apache-2.0
4// SPDX-License-Identifier: BSD-3-Clause
5//
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9//
10// http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18// Sets macros based on HWY_TARGET.
19
20// This include guard is toggled by foreach_target, so avoid the usual _H_
21// suffix to prevent copybara from renaming it.
22#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
23#ifdef HWY_SET_MACROS_PER_TARGET
24#undef HWY_SET_MACROS_PER_TARGET
25#else
26#define HWY_SET_MACROS_PER_TARGET
27#endif
28
29#endif // HWY_SET_MACROS_PER_TARGET
30
31#include "hwy/detect_compiler_arch.h" // IWYU: export
32#include "hwy/detect_targets.h" // IWYU: export
33
34#undef HWY_NAMESPACE
35#undef HWY_ALIGN
36#undef HWY_MAX_BYTES
37#undef HWY_LANES
38
39#undef HWY_HAVE_SCALABLE
40#undef HWY_HAVE_TUPLE
41#undef HWY_HAVE_INTEGER64
42#undef HWY_HAVE_FLOAT16
43#undef HWY_HAVE_FLOAT64
44#undef HWY_MEM_OPS_MIGHT_FAULT
45#undef HWY_NATIVE_FMA
46#undef HWY_CAP_GE256
47#undef HWY_CAP_GE512
48
49// Supported on all targets except RVV (requires GCC 14 or upcoming Clang)
50#if HWY_TARGET == HWY_RVV && \
51 ((HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
52 (HWY_COMPILER_CLANG))
53#define HWY_HAVE_TUPLE 0
54#else
55#define HWY_HAVE_TUPLE 1
56#endif
57
58// For internal use (clamping/validating N for Simd<>)
59#undef HWY_MAX_N
60#if HWY_TARGET == HWY_SCALAR
61#define HWY_MAX_N 1
62#else
63#define HWY_MAX_N 65536
64#endif
65
66// For internal use (clamping kPow2 for Simd<>)
67#undef HWY_MAX_POW2
68// For HWY_TARGET == HWY_RVV, LMUL <= 8. Even on other targets, we want to
69// support say Rebind<uint64_t, Simd<uint8_t, 1, 0>> d; whose kPow2 is also 3.
70// However, those other targets do not actually support multiple vectors, and
71// thus Lanes(d) must not exceed Lanes(ScalableTag<T>()).
72#define HWY_MAX_POW2 3
73
74// User-visible. Loose lower bound that guarantees HWY_MAX_BYTES >>
75// (-HWY_MIN_POW2) <= 1. Useful for terminating compile-time recursions.
76#undef HWY_MIN_POW2
77#if HWY_TARGET == HWY_RVV
78#define HWY_MIN_POW2 -16
79#else
80// Tighter bound for other targets, whose vectors are smaller, to potentially
81// save compile time.
82#define HWY_MIN_POW2 -8
83#endif // HWY_TARGET == HWY_RVV
84
85#undef HWY_TARGET_STR
86
87#if defined(HWY_DISABLE_PCLMUL_AES)
88#define HWY_TARGET_STR_PCLMUL_AES ""
89#else
90#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
91#endif
92
93#if defined(HWY_DISABLE_BMI2_FMA)
94#define HWY_TARGET_STR_BMI2_FMA ""
95#else
96#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
97#endif
98
99#if defined(HWY_DISABLE_F16C)
100#define HWY_TARGET_STR_F16C ""
101#else
102#define HWY_TARGET_STR_F16C ",f16c"
103#endif
104
105#define HWY_TARGET_STR_SSE2 "sse2"
106
107#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
108
109#define HWY_TARGET_STR_SSE4 \
110 HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
111// Include previous targets, which are the half-vectors of the next target.
112#define HWY_TARGET_STR_AVX2 \
113 HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
114#define HWY_TARGET_STR_AVX3 \
115 HWY_TARGET_STR_AVX2 ",avx512f,avx512cd,avx512vl,avx512dq,avx512bw"
116#define HWY_TARGET_STR_AVX3_DL \
117 HWY_TARGET_STR_AVX3 \
118 ",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avx512vnni,avx512bitalg," \
119 "avx512vpopcntdq,gfni"
120
121#if !HWY_COMPILER_CLANGCL && \
122 (HWY_COMPILER_GCC_ACTUAL >= 1000 || HWY_COMPILER_CLANG >= 900) && \
123 !defined(HWY_AVX3_DISABLE_AVX512BF16)
124#define HWY_TARGET_STR_AVX3_ZEN4 HWY_TARGET_STR_AVX3_DL ",avx512bf16"
125#else
126#define HWY_TARGET_STR_AVX3_ZEN4 HWY_TARGET_STR_AVX3_DL
127#endif
128
129#define HWY_TARGET_STR_AVX3_SPR HWY_TARGET_STR_AVX3_ZEN4 ",avx512fp16"
130
131#if defined(HWY_DISABLE_PPC8_CRYPTO)
132#define HWY_TARGET_STR_PPC8_CRYPTO ""
133#else
134#define HWY_TARGET_STR_PPC8_CRYPTO ",crypto"
135#endif
136
137#define HWY_TARGET_STR_PPC8 \
138 "altivec,vsx,power8-vector" HWY_TARGET_STR_PPC8_CRYPTO
139#define HWY_TARGET_STR_PPC9 HWY_TARGET_STR_PPC8 ",power9-vector"
140
141#if HWY_COMPILER_CLANG
142#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",power10-vector"
143#else
144// See #1707 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102059#c35.
145// When the baseline is PPC 8 or 9, inlining functions such as PreventElision
146// into PPC10 code fails because PPC10 defaults to no-htm and is thus worse than
147// the baseline, which has htm. We cannot have pragma target on functions
148// outside HWY_NAMESPACE such as those in base.h. It would be possible for users
149// to set -mno-htm globally, but we can also work around this at the library
150// level by claiming that PPC10 still has HTM, thus avoiding the mismatch. This
151// seems to be safe because HTM uses builtins rather than modifying codegen, see
152// https://gcc.gnu.org/legacy-ml/gcc-patches/2013-07/msg00167.html.
153#define HWY_TARGET_STR_PPC10 HWY_TARGET_STR_PPC9 ",cpu=power10,htm"
154#endif
155
156#define HWY_TARGET_STR_Z14 "arch=z14"
157#define HWY_TARGET_STR_Z15 "arch=z15"
158
159// Before include guard so we redefine HWY_TARGET_STR on each include,
160// governed by the current HWY_TARGET.
161
162//-----------------------------------------------------------------------------
163// SSE2
164#if HWY_TARGET == HWY_SSE2
165
166#define HWY_NAMESPACE N_SSE2
167#define HWY_ALIGN alignas(16)
168#define HWY_MAX_BYTES 16
169#define HWY_LANES(T) (16 / sizeof(T))
170
171#define HWY_HAVE_SCALABLE 0
172#define HWY_HAVE_INTEGER64 1
173#define HWY_HAVE_FLOAT16 0
174#define HWY_HAVE_FLOAT64 1
175#define HWY_MEM_OPS_MIGHT_FAULT 1
176#define HWY_NATIVE_FMA 0
177#define HWY_CAP_GE256 0
178#define HWY_CAP_GE512 0
179
180#define HWY_TARGET_STR HWY_TARGET_STR_SSE2
181//-----------------------------------------------------------------------------
182// SSSE3
183#elif HWY_TARGET == HWY_SSSE3
184
185#define HWY_NAMESPACE N_SSSE3
186#define HWY_ALIGN alignas(16)
187#define HWY_MAX_BYTES 16
188#define HWY_LANES(T) (16 / sizeof(T))
189
190#define HWY_HAVE_SCALABLE 0
191#define HWY_HAVE_INTEGER64 1
192#define HWY_HAVE_FLOAT16 0
193#define HWY_HAVE_FLOAT64 1
194#define HWY_MEM_OPS_MIGHT_FAULT 1
195#define HWY_NATIVE_FMA 0
196#define HWY_CAP_GE256 0
197#define HWY_CAP_GE512 0
198
199#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
200
201//-----------------------------------------------------------------------------
202// SSE4
203#elif HWY_TARGET == HWY_SSE4
204
205#define HWY_NAMESPACE N_SSE4
206#define HWY_ALIGN alignas(16)
207#define HWY_MAX_BYTES 16
208#define HWY_LANES(T) (16 / sizeof(T))
209
210#define HWY_HAVE_SCALABLE 0
211#define HWY_HAVE_INTEGER64 1
212#define HWY_HAVE_FLOAT16 0
213#define HWY_HAVE_FLOAT64 1
214#define HWY_MEM_OPS_MIGHT_FAULT 1
215#define HWY_NATIVE_FMA 0
216#define HWY_CAP_GE256 0
217#define HWY_CAP_GE512 0
218
219#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
220
221//-----------------------------------------------------------------------------
222// AVX2
223#elif HWY_TARGET == HWY_AVX2
224
225#define HWY_NAMESPACE N_AVX2
226#define HWY_ALIGN alignas(32)
227#define HWY_MAX_BYTES 32
228#define HWY_LANES(T) (32 / sizeof(T))
229
230#define HWY_HAVE_SCALABLE 0
231#define HWY_HAVE_INTEGER64 1
232#define HWY_HAVE_FLOAT16 0
233#define HWY_HAVE_FLOAT64 1
234#define HWY_MEM_OPS_MIGHT_FAULT 1
235
236#ifdef HWY_DISABLE_BMI2_FMA
237#define HWY_NATIVE_FMA 0
238#else
239#define HWY_NATIVE_FMA 1
240#endif
241
242#define HWY_CAP_GE256 1
243#define HWY_CAP_GE512 0
244
245#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
246
247//-----------------------------------------------------------------------------
248// AVX3[_DL]
249#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL || \
250 HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR
251
252#define HWY_ALIGN alignas(64)
253#define HWY_MAX_BYTES 64
254#define HWY_LANES(T) (64 / sizeof(T))
255
256#define HWY_HAVE_SCALABLE 0
257#define HWY_HAVE_INTEGER64 1
258#if HWY_TARGET == HWY_AVX3_SPR && HWY_COMPILER_GCC_ACTUAL && \
259 HWY_HAVE_SCALAR_F16_TYPE
260// TODO: enable F16 for AVX3_SPR target with Clang once compilation issues are
261// fixed
262#define HWY_HAVE_FLOAT16 1
263#else
264#define HWY_HAVE_FLOAT16 0
265#endif
266#define HWY_HAVE_FLOAT64 1
267#define HWY_MEM_OPS_MIGHT_FAULT 0
268#define HWY_NATIVE_FMA 1
269#define HWY_CAP_GE256 1
270#define HWY_CAP_GE512 1
271
272#if HWY_TARGET == HWY_AVX3
273
274#define HWY_NAMESPACE N_AVX3
275#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
276
277#elif HWY_TARGET == HWY_AVX3_DL
278
279#define HWY_NAMESPACE N_AVX3_DL
280#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_DL
281
282#elif HWY_TARGET == HWY_AVX3_ZEN4
283
284#define HWY_NAMESPACE N_AVX3_ZEN4
285#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_ZEN4
286
287#elif HWY_TARGET == HWY_AVX3_SPR
288
289#define HWY_NAMESPACE N_AVX3_SPR
290#define HWY_TARGET_STR HWY_TARGET_STR_AVX3_SPR
291
292#else
293#error "Logic error"
294#endif // HWY_TARGET
295
296//-----------------------------------------------------------------------------
297// PPC8, PPC9, PPC10
298#elif HWY_TARGET == HWY_PPC8 || HWY_TARGET == HWY_PPC9 || \
299 HWY_TARGET == HWY_PPC10
300
301#define HWY_ALIGN alignas(16)
302#define HWY_MAX_BYTES 16
303#define HWY_LANES(T) (16 / sizeof(T))
304
305#define HWY_HAVE_SCALABLE 0
306#define HWY_HAVE_INTEGER64 1
307#define HWY_HAVE_FLOAT16 0
308#define HWY_HAVE_FLOAT64 1
309#define HWY_MEM_OPS_MIGHT_FAULT 1
310#define HWY_NATIVE_FMA 1
311#define HWY_CAP_GE256 0
312#define HWY_CAP_GE512 0
313
314#if HWY_TARGET == HWY_PPC8
315
316#define HWY_NAMESPACE N_PPC8
317#define HWY_TARGET_STR HWY_TARGET_STR_PPC8
318
319#elif HWY_TARGET == HWY_PPC9
320
321#define HWY_NAMESPACE N_PPC9
322#define HWY_TARGET_STR HWY_TARGET_STR_PPC9
323
324#elif HWY_TARGET == HWY_PPC10
325
326#define HWY_NAMESPACE N_PPC10
327#define HWY_TARGET_STR HWY_TARGET_STR_PPC10
328
329#else
330#error "Logic error"
331#endif // HWY_TARGET == HWY_PPC10
332
333//-----------------------------------------------------------------------------
334// Z14, Z15
335#elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15
336
337#define HWY_ALIGN alignas(16)
338#define HWY_MAX_BYTES 16
339#define HWY_LANES(T) (16 / sizeof(T))
340
341#define HWY_HAVE_SCALABLE 0
342#define HWY_HAVE_INTEGER64 1
343#define HWY_HAVE_FLOAT16 0
344#define HWY_HAVE_FLOAT64 1
345#define HWY_MEM_OPS_MIGHT_FAULT 1
346#define HWY_NATIVE_FMA 1
347#define HWY_CAP_GE256 0
348#define HWY_CAP_GE512 0
349
350#if HWY_TARGET == HWY_Z14
351
352#define HWY_NAMESPACE N_Z14
353#define HWY_TARGET_STR HWY_TARGET_STR_Z14
354
355#elif HWY_TARGET == HWY_Z15
356
357#define HWY_NAMESPACE N_Z15
358#define HWY_TARGET_STR HWY_TARGET_STR_Z15
359
360#else
361#error "Logic error"
362#endif // HWY_TARGET == HWY_Z15
363
364//-----------------------------------------------------------------------------
365// NEON
366#elif HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_NEON_WITHOUT_AES
367
368#define HWY_ALIGN alignas(16)
369#define HWY_MAX_BYTES 16
370#define HWY_LANES(T) (16 / sizeof(T))
371
372#define HWY_HAVE_SCALABLE 0
373#define HWY_HAVE_INTEGER64 1
374#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
375#define HWY_HAVE_FLOAT16 1
376#else
377#define HWY_HAVE_FLOAT16 0
378#endif
379
380#if HWY_ARCH_ARM_A64
381#define HWY_HAVE_FLOAT64 1
382#else
383#define HWY_HAVE_FLOAT64 0
384#endif
385
386#define HWY_MEM_OPS_MIGHT_FAULT 1
387
388#if defined(__ARM_FEATURE_FMA) || defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
389#define HWY_NATIVE_FMA 1
390#else
391#define HWY_NATIVE_FMA 0
392#endif
393
394#define HWY_CAP_GE256 0
395#define HWY_CAP_GE512 0
396
397#if HWY_TARGET == HWY_NEON_WITHOUT_AES
398#define HWY_NAMESPACE N_NEON_WITHOUT_AES
399#else
400#define HWY_NAMESPACE N_NEON
401#endif
402
403// Can use pragmas instead of -march compiler flag
404#if HWY_HAVE_RUNTIME_DISPATCH
405#if HWY_ARCH_ARM_V7
406
407// The __attribute__((target(+neon-vfpv4)) was introduced in gcc >= 8.
408#if HWY_COMPILER_GCC_ACTUAL >= 800
409#define HWY_TARGET_STR "+neon-vfpv4"
410#else // GCC < 7
411// Do not define HWY_TARGET_STR (no pragma).
412#endif // HWY_COMPILER_GCC_ACTUAL
413
414#else // !HWY_ARCH_ARM_V7
415
416#if HWY_TARGET == HWY_NEON_WITHOUT_AES
417// Do not define HWY_TARGET_STR (no pragma).
418#else
419#define HWY_TARGET_STR "+crypto"
420#endif // HWY_TARGET == HWY_NEON_WITHOUT_AES
421
422#endif // HWY_ARCH_ARM_V7
423#else // !HWY_HAVE_RUNTIME_DISPATCH
424// HWY_TARGET_STR remains undefined
425#endif
426
427//-----------------------------------------------------------------------------
428// SVE[2]
429#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
430 HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
431
432// SVE only requires lane alignment, not natural alignment of the entire vector.
433#define HWY_ALIGN alignas(8)
434
435// Value ensures MaxLanes() is the tightest possible upper bound to reduce
436// overallocation.
437#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
438
439#define HWY_HAVE_INTEGER64 1
440#define HWY_HAVE_FLOAT16 1
441#define HWY_HAVE_FLOAT64 1
442#define HWY_MEM_OPS_MIGHT_FAULT 0
443#define HWY_NATIVE_FMA 1
444#define HWY_CAP_GE256 0
445#define HWY_CAP_GE512 0
446
447#if HWY_TARGET == HWY_SVE2
448#define HWY_NAMESPACE N_SVE2
449#define HWY_MAX_BYTES 256
450#define HWY_HAVE_SCALABLE 1
451#elif HWY_TARGET == HWY_SVE_256
452#define HWY_NAMESPACE N_SVE_256
453#define HWY_MAX_BYTES 32
454#define HWY_HAVE_SCALABLE 0
455#elif HWY_TARGET == HWY_SVE2_128
456#define HWY_NAMESPACE N_SVE2_128
457#define HWY_MAX_BYTES 16
458#define HWY_HAVE_SCALABLE 0
459#else
460#define HWY_NAMESPACE N_SVE
461#define HWY_MAX_BYTES 256
462#define HWY_HAVE_SCALABLE 1
463#endif
464
465// Can use pragmas instead of -march compiler flag
466#if HWY_HAVE_RUNTIME_DISPATCH
467#if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
468// Static dispatch with -march=armv8-a+sve2+aes, or no baseline, hence dynamic
469// dispatch, which checks for AES support at runtime.
470#if defined(__ARM_FEATURE_SVE2_AES) || (HWY_BASELINE_SVE2 == 0)
471#define HWY_TARGET_STR "+sve2-aes"
472#else // SVE2 without AES
473#define HWY_TARGET_STR "+sve2"
474#endif
475#else // not SVE2 target
476#define HWY_TARGET_STR "+sve"
477#endif
478#else // !HWY_HAVE_RUNTIME_DISPATCH
479// HWY_TARGET_STR remains undefined
480#endif
481
482//-----------------------------------------------------------------------------
483// WASM
484#elif HWY_TARGET == HWY_WASM
485
486#define HWY_ALIGN alignas(16)
487#define HWY_MAX_BYTES 16
488#define HWY_LANES(T) (16 / sizeof(T))
489
490#define HWY_HAVE_SCALABLE 0
491#define HWY_HAVE_INTEGER64 1
492#define HWY_HAVE_FLOAT16 0
493#define HWY_HAVE_FLOAT64 1
494#define HWY_MEM_OPS_MIGHT_FAULT 1
495#define HWY_NATIVE_FMA 0
496#define HWY_CAP_GE256 0
497#define HWY_CAP_GE512 0
498
499#define HWY_NAMESPACE N_WASM
500
501#define HWY_TARGET_STR "simd128"
502
503//-----------------------------------------------------------------------------
504// WASM_EMU256
505#elif HWY_TARGET == HWY_WASM_EMU256
506
507#define HWY_ALIGN alignas(32)
508#define HWY_MAX_BYTES 32
509#define HWY_LANES(T) (32 / sizeof(T))
510
511#define HWY_HAVE_SCALABLE 0
512#define HWY_HAVE_INTEGER64 1
513#define HWY_HAVE_FLOAT16 0
514#define HWY_HAVE_FLOAT64 0
515#define HWY_MEM_OPS_MIGHT_FAULT 1
516#define HWY_NATIVE_FMA 0
517#define HWY_CAP_GE256 1
518#define HWY_CAP_GE512 0
519
520#define HWY_NAMESPACE N_WASM_EMU256
521
522#define HWY_TARGET_STR "simd128"
523
524//-----------------------------------------------------------------------------
525// RVV
526#elif HWY_TARGET == HWY_RVV
527
528// RVV only requires lane alignment, not natural alignment of the entire vector,
529// and the compiler already aligns builtin types, so nothing to do here.
530#define HWY_ALIGN
531
532// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
533#define HWY_MAX_BYTES 65536
534
535// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
536// LMUL. This is the tightest possible upper bound.
537#define HWY_LANES(T) (8192 / sizeof(T))
538
539#define HWY_HAVE_SCALABLE 1
540#define HWY_HAVE_INTEGER64 1
541#define HWY_HAVE_FLOAT64 1
542#define HWY_MEM_OPS_MIGHT_FAULT 0
543#define HWY_NATIVE_FMA 1
544#define HWY_CAP_GE256 0
545#define HWY_CAP_GE512 0
546
547#if HWY_RVV_HAVE_F16_VEC
548#define HWY_HAVE_FLOAT16 1
549#else
550#define HWY_HAVE_FLOAT16 0
551#endif
552
553#define HWY_NAMESPACE N_RVV
554
555// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
556// (rv64gcv is not a valid target)
557
558//-----------------------------------------------------------------------------
559// EMU128
560#elif HWY_TARGET == HWY_EMU128
561
562#define HWY_ALIGN alignas(16)
563#define HWY_MAX_BYTES 16
564#define HWY_LANES(T) (16 / sizeof(T))
565
566#define HWY_HAVE_SCALABLE 0
567#define HWY_HAVE_INTEGER64 1
568#define HWY_HAVE_FLOAT16 0
569#define HWY_HAVE_FLOAT64 1
570#define HWY_MEM_OPS_MIGHT_FAULT 1
571#define HWY_NATIVE_FMA 0
572#define HWY_CAP_GE256 0
573#define HWY_CAP_GE512 0
574
575#define HWY_NAMESPACE N_EMU128
576
577// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
578
579//-----------------------------------------------------------------------------
580// SCALAR
581#elif HWY_TARGET == HWY_SCALAR
582
583#define HWY_ALIGN
584#define HWY_MAX_BYTES 8
585#define HWY_LANES(T) 1
586
587#define HWY_HAVE_SCALABLE 0
588#define HWY_HAVE_INTEGER64 1
589#define HWY_HAVE_FLOAT16 0
590#define HWY_HAVE_FLOAT64 1
591#define HWY_MEM_OPS_MIGHT_FAULT 0
592#define HWY_NATIVE_FMA 0
593#define HWY_CAP_GE256 0
594#define HWY_CAP_GE512 0
595
596#define HWY_NAMESPACE N_SCALAR
597
598// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
599
600#else
601#pragma message("HWY_TARGET does not match any known target")
602#endif // HWY_TARGET
603
604//-----------------------------------------------------------------------------
605
606// Sanity check: if we have f16 vector support, then base.h should also be
607// using a built-in type for f16 scalars.
608#if HWY_HAVE_FLOAT16 && !HWY_HAVE_SCALAR_F16_TYPE
609#error "Logic error: f16 vectors but no scalars"
610#endif
611
612// Override this to 1 in asan/msan builds, which will still fault.
613#if HWY_IS_ASAN || HWY_IS_MSAN
614#undef HWY_MEM_OPS_MIGHT_FAULT
615#define HWY_MEM_OPS_MIGHT_FAULT 1
616#endif
617
618// Clang <9 requires this be invoked at file scope, before any namespace.
619#undef HWY_BEFORE_NAMESPACE
620#if defined(HWY_TARGET_STR)
621#define HWY_BEFORE_NAMESPACE() \
622 HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
623 static_assert(true, "For requiring trailing semicolon")
624#else
625// avoids compiler warning if no HWY_TARGET_STR
626#define HWY_BEFORE_NAMESPACE() \
627 static_assert(true, "For requiring trailing semicolon")
628#endif
629
630// Clang <9 requires any namespaces be closed before this macro.
631#undef HWY_AFTER_NAMESPACE
632#if defined(HWY_TARGET_STR)
633#define HWY_AFTER_NAMESPACE() \
634 HWY_POP_ATTRIBUTES \
635 static_assert(true, "For requiring trailing semicolon")
636#else
637// avoids compiler warning if no HWY_TARGET_STR
638#define HWY_AFTER_NAMESPACE() \
639 static_assert(true, "For requiring trailing semicolon")
640#endif
641
642#undef HWY_ATTR
643#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
644#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
645#else
646#define HWY_ATTR
647#endif