Go to the documentation of this file.
16#ifndef HIGHWAY_HWY_DETECT_TARGETS_H_
17#define HIGHWAY_HWY_DETECT_TARGETS_H_
63#define HWY_AVX3_SPR (1LL << 4)
68#define HWY_AVX3_ZEN4 (1LL << 6)
73#define HWY_AVX3_DL (1LL << 7)
74#define HWY_AVX3 (1LL << 8)
75#define HWY_AVX2 (1LL << 9)
77#define HWY_SSE4 (1LL << 11)
78#define HWY_SSSE3 (1LL << 12)
80#define HWY_SSE2 (1LL << 14)
85#define HWY_HIGHEST_TARGET_BIT_X86 14
89#define HWY_SVE2_128 (1LL << 24)
90#define HWY_SVE_256 (1LL << 25)
91#define HWY_SVE2 (1LL << 26)
92#define HWY_SVE (1LL << 27)
93#define HWY_NEON (1LL << 28)
94#define HWY_NEON_WITHOUT_AES (1LL << 29)
95#define HWY_HIGHEST_TARGET_BIT_ARM 29
99#define HWY_RVV (1LL << 37)
101#define HWY_HIGHEST_TARGET_BIT_RVV 38
108#define HWY_PPC10 (1LL << 47)
109#define HWY_PPC9 (1LL << 48)
110#define HWY_PPC8 (1LL << 49)
111#define HWY_Z15 (1LL << 50)
112#define HWY_Z14 (1LL << 51)
113#define HWY_HIGHEST_TARGET_BIT_PPC 51
117#define HWY_WASM_EMU256 (1LL << 58)
118#define HWY_WASM (1LL << 59)
120#define HWY_HIGHEST_TARGET_BIT_WASM 60
124#define HWY_EMU128 (1LL << 61)
126#define HWY_SCALAR (1LL << 62)
127#define HWY_HIGHEST_TARGET_BIT_SCALAR 62
136#ifndef HWY_DISABLED_TARGETS
137#define HWY_DISABLED_TARGETS 0
145#if HWY_ARCH_X86 && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
147#define HWY_BROKEN_CLANG6 (HWY_SSE4 | (HWY_SSE4 - 1))
150#if !defined(HWY_COMPILE_ONLY_SCALAR)
151#pragma message("x86 Clang <= 6: define HWY_COMPILE_ONLY_SCALAR or upgrade.")
155#define HWY_BROKEN_CLANG6 0
160#define HWY_BROKEN_32BIT (HWY_AVX2 | (HWY_AVX2 - 1))
162#define HWY_BROKEN_32BIT 0
166#if HWY_COMPILER_MSVC != 0
167#define HWY_BROKEN_MSVC (HWY_AVX3 | (HWY_AVX3 - 1))
169#define HWY_BROKEN_MSVC 0
174#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 801) || \
175 (HWY_COMPILER_ICC && HWY_COMPILER_ICC < 2021)
176#define HWY_BROKEN_AVX3_DL_ZEN4 (HWY_AVX3_DL | HWY_AVX3_ZEN4)
178#define HWY_BROKEN_AVX3_DL_ZEN4 0
182#if (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 1400) || \
183 (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1200) || \
184 (HWY_COMPILER_ICC && HWY_COMPILER_ICC < 2021)
185#define HWY_BROKEN_AVX3_SPR (HWY_AVX3_SPR)
187#define HWY_BROKEN_AVX3_SPR 0
191#if HWY_ARCH_ARM_V7 && HWY_IS_BIG_ENDIAN
192#define HWY_BROKEN_ARM7_BIG_ENDIAN (HWY_NEON | HWY_NEON_WITHOUT_AES)
194#define HWY_BROKEN_ARM7_BIG_ENDIAN 0
200#if HWY_ARCH_ARM_V7 && (__ARM_ARCH_PROFILE == 'A') && \
201 !defined(__ARM_VFPV4__) && \
202 !((__ARM_NEON_FP & 0x2 ) && (__ARM_FEATURE_FMA == 1))
203#define HWY_BROKEN_ARM7_WITHOUT_VFP4 (HWY_NEON | HWY_NEON_WITHOUT_AES)
205#define HWY_BROKEN_ARM7_WITHOUT_VFP4 0
209#if (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
210 (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
211#define HWY_BROKEN_SVE (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
213#define HWY_BROKEN_SVE 0
216#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1100)
219#define HWY_BROKEN_PPC10 (HWY_PPC10)
220#elif HWY_ARCH_PPC && HWY_IS_BIG_ENDIAN && \
221 ((HWY_COMPILER3_CLANG && HWY_COMPILER3_CLANG < 160001) || \
222 (HWY_COMPILER_GCC_ACTUAL >= 1200 && HWY_COMPILER_GCC_ACTUAL <= 1203) || \
223 (HWY_COMPILER_GCC_ACTUAL >= 1300 && HWY_COMPILER_GCC_ACTUAL <= 1301))
239#define HWY_BROKEN_PPC10 (HWY_PPC10)
241#define HWY_BROKEN_PPC10 0
245#ifndef HWY_BROKEN_TARGETS
247#define HWY_BROKEN_TARGETS \
248 (HWY_BROKEN_CLANG6 | HWY_BROKEN_32BIT | HWY_BROKEN_MSVC | \
249 HWY_BROKEN_AVX3_DL_ZEN4 | HWY_BROKEN_AVX3_SPR | \
250 HWY_BROKEN_ARM7_BIG_ENDIAN | HWY_BROKEN_ARM7_WITHOUT_VFP4 | \
251 HWY_BROKEN_SVE | HWY_BROKEN_PPC10)
256#define HWY_ENABLED(targets) \
257 ((targets) & ~((HWY_DISABLED_TARGETS) | (HWY_BROKEN_TARGETS)))
264#if !defined(HWY_BROKEN_EMU128)
265#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400) || \
266 defined(HWY_NO_LIBCXX)
267#define HWY_BROKEN_EMU128 1
269#define HWY_BROKEN_EMU128 0
280#if defined(HWY_COMPILE_ONLY_SCALAR) || HWY_BROKEN_EMU128
281#define HWY_BASELINE_SCALAR HWY_SCALAR
283#define HWY_BASELINE_SCALAR HWY_EMU128
289#if HWY_ARCH_WASM && defined(__wasm_simd128__)
290#if defined(HWY_WANT_WASM2)
291#define HWY_BASELINE_WASM HWY_WASM_EMU256
293#define HWY_BASELINE_WASM HWY_WASM
296#define HWY_BASELINE_WASM 0
300#if HWY_ARCH_PPC && HWY_COMPILER_GCC && defined(__ALTIVEC__) && \
301 defined(__VSX__) && defined(__POWER8_VECTOR__) && \
302 (defined(__CRYPTO__) || defined(HWY_DISABLE_PPC8_CRYPTO))
303#define HWY_BASELINE_PPC8 HWY_PPC8
305#define HWY_BASELINE_PPC8 0
308#if HWY_BASELINE_PPC8 != 0 && defined(__POWER9_VECTOR__)
309#define HWY_BASELINE_PPC9 HWY_PPC9
311#define HWY_BASELINE_PPC9 0
314#if HWY_BASELINE_PPC9 != 0 && \
315 (defined(_ARCH_PWR10) || defined(__POWER10_VECTOR__))
316#define HWY_BASELINE_PPC10 HWY_PPC10
318#define HWY_BASELINE_PPC10 0
321#if HWY_ARCH_S390X && defined(__VEC__) && defined(__ARCH__) && __ARCH__ >= 12
322#define HWY_BASELINE_Z14 HWY_Z14
324#define HWY_BASELINE_Z14 0
327#if HWY_BASELINE_Z14 && __ARCH__ >= 13
328#define HWY_BASELINE_Z15 HWY_Z15
330#define HWY_BASELINE_Z15 0
333#define HWY_BASELINE_SVE2 0
334#define HWY_BASELINE_SVE 0
335#define HWY_BASELINE_NEON 0
339#if defined(__ARM_FEATURE_SVE2)
340#undef HWY_BASELINE_SVE2
343#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 128
344#define HWY_BASELINE_SVE2 HWY_SVE2_128
350#define HWY_BASELINE_SVE2 HWY_SVE2
354#if defined(__ARM_FEATURE_SVE)
355#undef HWY_BASELINE_SVE
357#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
358#define HWY_BASELINE_SVE HWY_SVE_256
360#define HWY_BASELINE_SVE HWY_SVE
365#if defined(__ARM_NEON__) || defined(__ARM_NEON)
366#undef HWY_BASELINE_NEON
367#if defined(__ARM_FEATURE_AES)
368#define HWY_BASELINE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
370#define HWY_BASELINE_NEON (HWY_NEON_WITHOUT_AES)
381#define HWY_CHECK_SSE2 1
383#define HWY_CHECK_SSE2 0
386#define HWY_CHECK_SSE2 1
388#define HWY_CHECK_SSE2 0
394#define HWY_CHECK_SSSE3 1
395#define HWY_CHECK_SSE4 1
397#define HWY_CHECK_SSSE3 0
398#define HWY_CHECK_SSE4 0
403#define HWY_CHECK_PCLMUL_AES 1
404#define HWY_CHECK_BMI2_FMA 1
405#define HWY_CHECK_F16C 1
410#define HWY_CHECK_SSE2 1
412#define HWY_CHECK_SSE2 0
415#if defined(__SSSE3__)
416#define HWY_CHECK_SSSE3 1
418#define HWY_CHECK_SSSE3 0
421#if defined(__SSE4_1__) && defined(__SSE4_2__)
422#define HWY_CHECK_SSE4 1
424#define HWY_CHECK_SSE4 0
428#if defined(HWY_DISABLE_PCLMUL_AES) || (defined(__PCLMUL__) && defined(__AES__))
429#define HWY_CHECK_PCLMUL_AES 1
431#define HWY_CHECK_PCLMUL_AES 0
434#if defined(HWY_DISABLE_BMI2_FMA) || (defined(__BMI2__) && defined(__FMA__))
435#define HWY_CHECK_BMI2_FMA 1
437#define HWY_CHECK_BMI2_FMA 0
440#if defined(HWY_DISABLE_F16C) || defined(__F16C__)
441#define HWY_CHECK_F16C 1
443#define HWY_CHECK_F16C 0
448#if HWY_ARCH_X86 && (HWY_WANT_SSE2 || HWY_CHECK_SSE2)
449#define HWY_BASELINE_SSE2 HWY_SSE2
451#define HWY_BASELINE_SSE2 0
454#if HWY_ARCH_X86 && (HWY_WANT_SSSE3 || HWY_CHECK_SSSE3)
455#define HWY_BASELINE_SSSE3 HWY_SSSE3
457#define HWY_BASELINE_SSSE3 0
460#if HWY_ARCH_X86 && (HWY_WANT_SSE4 || (HWY_CHECK_SSE4 && HWY_CHECK_PCLMUL_AES))
461#define HWY_BASELINE_SSE4 HWY_SSE4
463#define HWY_BASELINE_SSE4 0
466#if HWY_BASELINE_SSE4 != 0 && HWY_CHECK_BMI2_FMA && HWY_CHECK_F16C && \
468#define HWY_BASELINE_AVX2 HWY_AVX2
470#define HWY_BASELINE_AVX2 0
474#if HWY_BASELINE_AVX2 != 0 && defined(__AVX512F__) && defined(__AVX512BW__) && \
475 defined(__AVX512DQ__) && defined(__AVX512VL__)
476#define HWY_BASELINE_AVX3 HWY_AVX3
478#define HWY_BASELINE_AVX3 0
482#if HWY_BASELINE_AVX3 != 0 && defined(__AVX512VNNI__) && defined(__VAES__) && \
483 defined(__VPCLMULQDQ__) && defined(__AVX512VBMI__) && \
484 defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
485 defined(__AVX512BITALG__)
486#define HWY_BASELINE_AVX3_DL HWY_AVX3_DL
488#define HWY_BASELINE_AVX3_DL 0
494#if defined(HWY_WANT_AVX3_ZEN4) && HWY_BASELINE_AVX3_DL != 0
495#define HWY_BASELINE_AVX3_ZEN4 HWY_AVX3_ZEN4
497#define HWY_BASELINE_AVX3_ZEN4 0
500#if HWY_BASELINE_AVX3_DL != 0 && defined(__AVX512BF16__) && \
501 defined(__AVX512FP16__)
502#define HWY_BASELINE_AVX3_SPR HWY_AVX3_SPR
504#define HWY_BASELINE_AVX3_SPR 0
508#if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \
509 __riscv_v_intrinsic >= 11000
510#define HWY_BASELINE_RVV HWY_RVV
512#define HWY_BASELINE_RVV 0
516#ifndef HWY_BASELINE_TARGETS
517#define HWY_BASELINE_TARGETS \
518 (HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | \
519 HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10 | HWY_BASELINE_Z14 | \
520 HWY_BASELINE_Z15 | HWY_BASELINE_SVE2 | HWY_BASELINE_SVE | \
521 HWY_BASELINE_NEON | HWY_BASELINE_SSE2 | HWY_BASELINE_SSSE3 | \
522 HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 | HWY_BASELINE_AVX3 | \
523 HWY_BASELINE_AVX3_DL | HWY_BASELINE_AVX3_ZEN4 | HWY_BASELINE_AVX3_SPR | \
530#define HWY_ENABLED_BASELINE HWY_ENABLED(HWY_BASELINE_TARGETS)
531#if HWY_ENABLED_BASELINE == 0
532#error "At least one baseline target must be defined and enabled"
537#define HWY_STATIC_TARGET (HWY_ENABLED_BASELINE & -HWY_ENABLED_BASELINE)
543#define HWY_TARGET HWY_STATIC_TARGET
548#if 1 < (defined(HWY_COMPILE_ONLY_SCALAR) + defined(HWY_COMPILE_ONLY_EMU128) + \
549 defined(HWY_COMPILE_ONLY_STATIC))
550#error "Can only define one of HWY_COMPILE_ONLY_{SCALAR|EMU128|STATIC} - bug?"
556#define HWY_HAVE_RUNTIME_DISPATCH 1
560#elif (HWY_ARCH_ARM || HWY_ARCH_PPC || HWY_ARCH_S390X || HWY_ARCH_RISCV) && \
561 (HWY_COMPILER_GCC_ACTUAL || \
562 (HWY_COMPILER_CLANG >= 1600 && \
563 (!HWY_ARCH_ARM || defined(HWY_ENABLE_CLANG_ARM_DISPATCH)))) && \
564 HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H)
565#define HWY_HAVE_RUNTIME_DISPATCH 1
567#define HWY_HAVE_RUNTIME_DISPATCH 0
573#if defined(HWY_WANT_AVX3_DL) || (HWY_BASELINE_TARGETS & HWY_AVX3_DL)
574#define HWY_ATTAINABLE_AVX3_DL (HWY_AVX3_DL)
576#define HWY_ATTAINABLE_AVX3_DL 0
579#if HWY_ARCH_ARM_A64 && HWY_HAVE_RUNTIME_DISPATCH
580#define HWY_ATTAINABLE_NEON (HWY_NEON | HWY_NEON_WITHOUT_AES)
582#define HWY_ATTAINABLE_NEON (HWY_BASELINE_NEON)
584#define HWY_ATTAINABLE_NEON 0
587#if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
588 (HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
589#define HWY_ATTAINABLE_SVE (HWY_SVE | HWY_SVE_256)
591#define HWY_ATTAINABLE_SVE 0
594#if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
595 (HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
596#define HWY_ATTAINABLE_SVE2 (HWY_SVE2 | HWY_SVE2_128)
598#define HWY_ATTAINABLE_SVE2 0
601#if HWY_ARCH_PPC && defined(__ALTIVEC__) && \
602 (!HWY_COMPILER_CLANG || HWY_BASELINE_PPC8 != 0)
604#if (HWY_BASELINE_PPC9 | HWY_BASELINE_PPC10) && \
605 !defined(HWY_SKIP_NON_BEST_BASELINE)
610#define HWY_SKIP_NON_BEST_BASELINE
613#define HWY_ATTAINABLE_PPC (HWY_PPC8 | HWY_PPC9 | HWY_PPC10)
616#define HWY_ATTAINABLE_PPC 0
619#if HWY_ARCH_S390X && HWY_BASELINE_Z14 != 0
620#define HWY_ATTAINABLE_S390X (HWY_Z14 | HWY_Z15)
622#define HWY_ATTAINABLE_S390X 0
630#define HWY_ATTAINABLE_TARGETS \
631 HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_STATIC_TARGET | HWY_AVX2)
633#define HWY_ATTAINABLE_TARGETS \
634 HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | \
635 HWY_AVX2 | HWY_AVX3 | HWY_ATTAINABLE_AVX3_DL | HWY_AVX3_ZEN4 | \
639#define HWY_ATTAINABLE_TARGETS \
640 HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_NEON | HWY_ATTAINABLE_SVE | \
643#define HWY_ATTAINABLE_TARGETS \
644 HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_PPC)
646#define HWY_ATTAINABLE_TARGETS \
647 HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_ATTAINABLE_S390X)
649#define HWY_ATTAINABLE_TARGETS (HWY_ENABLED_BASELINE)
653#if defined(HWY_COMPILE_ONLY_EMU128) && !HWY_BROKEN_EMU128
654#undef HWY_STATIC_TARGET
655#define HWY_STATIC_TARGET HWY_EMU128
656#define HWY_TARGETS HWY_EMU128
660#elif defined(HWY_COMPILE_ONLY_SCALAR) || \
661 (defined(HWY_COMPILE_ONLY_EMU128) && HWY_BROKEN_EMU128)
662#undef HWY_STATIC_TARGET
663#define HWY_STATIC_TARGET HWY_SCALAR
664#define HWY_TARGETS HWY_SCALAR
667#elif defined(HWY_COMPILE_ONLY_STATIC)
668#define HWY_TARGETS HWY_STATIC_TARGET
671#elif (defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)) && \
672 !defined(HWY_SKIP_NON_BEST_BASELINE)
673#define HWY_TARGETS HWY_ATTAINABLE_TARGETS
681 (HWY_ATTAINABLE_TARGETS & ((HWY_STATIC_TARGET - 1LL) | HWY_STATIC_TARGET))
688#if (HWY_TARGETS & HWY_STATIC_TARGET) == 0
689#error "Logic error: best baseline should be included in dynamic targets"