11#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12#define EIGEN_CONFIGURE_VECTORIZATION_H
36#if (defined EIGEN_CUDACC)
37 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
38 #define EIGEN_ALIGNOF(x) __alignof(x)
39#elif EIGEN_HAS_ALIGNAS
40 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
41 #define EIGEN_ALIGNOF(x) alignof(x)
42#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
43 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
44 #define EIGEN_ALIGNOF(x) __alignof(x)
46 #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
47 #define EIGEN_ALIGNOF(x) __alignof(x)
50 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
51 #define EIGEN_ALIGNOF(x) __alignof(x)
53 #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
57#if defined(EIGEN_DONT_VECTORIZE)
58 #if defined(EIGEN_GPUCC)
61 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
63 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
65#elif defined(__AVX512F__)
67 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
70 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
72 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
77#define EIGEN_MIN_ALIGN_BYTES 16
83#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
84#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
89#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
90 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
91 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
93 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
96#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
106 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
107 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
108 #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
112 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
114 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
118 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
119 && !EIGEN_GCC3_OR_OLDER \
120 && !EIGEN_COMP_SUNCC \
122 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
124 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
127 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
128 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
130 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
136#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
137#undef EIGEN_MAX_STATIC_ALIGN_BYTES
138#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
141#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
142 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
152#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
153#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
154#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
155#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
156#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
157#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
159#define EIGEN_ALIGN_MAX
165#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
166#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
169#ifdef EIGEN_DONT_ALIGN
170 #ifdef EIGEN_MAX_ALIGN_BYTES
171 #undef EIGEN_MAX_ALIGN_BYTES
173 #define EIGEN_MAX_ALIGN_BYTES 0
174#elif !defined(EIGEN_MAX_ALIGN_BYTES)
175 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
178#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
179#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
181#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
185#ifndef EIGEN_UNALIGNED_VECTORIZE
186#define EIGEN_UNALIGNED_VECTORIZE 1
193#if EIGEN_MAX_ALIGN_BYTES==0
194 #ifndef EIGEN_DONT_VECTORIZE
195 #define EIGEN_DONT_VECTORIZE
204 #if (EIGEN_COMP_MSVC >= 1500)
206 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
207 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
211 #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
212 #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
216#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
218 #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
223 #define EIGEN_VECTORIZE
224 #define EIGEN_VECTORIZE_SSE
225 #define EIGEN_VECTORIZE_SSE2
232 #define EIGEN_VECTORIZE_SSE3
235 #define EIGEN_VECTORIZE_SSSE3
238 #define EIGEN_VECTORIZE_SSE4_1
241 #define EIGEN_VECTORIZE_SSE4_2
244 #ifndef EIGEN_USE_SYCL
245 #define EIGEN_VECTORIZE_AVX
247 #define EIGEN_VECTORIZE_SSE3
248 #define EIGEN_VECTORIZE_SSSE3
249 #define EIGEN_VECTORIZE_SSE4_1
250 #define EIGEN_VECTORIZE_SSE4_2
253 #ifndef EIGEN_USE_SYCL
254 #define EIGEN_VECTORIZE_AVX2
255 #define EIGEN_VECTORIZE_AVX
257 #define EIGEN_VECTORIZE_SSE3
258 #define EIGEN_VECTORIZE_SSSE3
259 #define EIGEN_VECTORIZE_SSE4_1
260 #define EIGEN_VECTORIZE_SSE4_2
262 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
265 #define EIGEN_VECTORIZE_FMA
267 #if defined(__AVX512F__)
268 #ifndef EIGEN_VECTORIZE_FMA
270 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
272 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
275 #ifndef EIGEN_USE_SYCL
276 #define EIGEN_VECTORIZE_AVX512
277 #define EIGEN_VECTORIZE_AVX2
278 #define EIGEN_VECTORIZE_AVX
280 #define EIGEN_VECTORIZE_FMA
281 #define EIGEN_VECTORIZE_SSE3
282 #define EIGEN_VECTORIZE_SSSE3
283 #define EIGEN_VECTORIZE_SSE4_1
284 #define EIGEN_VECTORIZE_SSE4_2
285 #ifndef EIGEN_USE_SYCL
287 #define EIGEN_VECTORIZE_AVX512DQ
290 #define EIGEN_VECTORIZE_AVX512ER
292 #ifdef __AVX512BF16__
293 #define EIGEN_VECTORIZE_AVX512BF16
299 #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
302 #ifdef EIGEN_VECTORIZE_AVX
303 #undef EIGEN_VECTORIZE_AVX
304 #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
305 #ifdef EIGEN_VECTORIZE_AVX2
306 #undef EIGEN_VECTORIZE_AVX2
308 #ifdef EIGEN_VECTORIZE_FMA
309 #undef EIGEN_VECTORIZE_FMA
311 #ifdef EIGEN_VECTORIZE_AVX512
312 #undef EIGEN_VECTORIZE_AVX512
314 #ifdef EIGEN_VECTORIZE_AVX512DQ
315 #undef EIGEN_VECTORIZE_AVX512DQ
317 #ifdef EIGEN_VECTORIZE_AVX512ER
318 #undef EIGEN_VECTORIZE_AVX512ER
342 #if EIGEN_COMP_ICC >= 1110
343 #include <immintrin.h>
345 #include <mmintrin.h>
346 #include <emmintrin.h>
347 #include <xmmintrin.h>
348 #ifdef EIGEN_VECTORIZE_SSE3
349 #include <pmmintrin.h>
351 #ifdef EIGEN_VECTORIZE_SSSE3
352 #include <tmmintrin.h>
354 #ifdef EIGEN_VECTORIZE_SSE4_1
355 #include <smmintrin.h>
357 #ifdef EIGEN_VECTORIZE_SSE4_2
358 #include <nmmintrin.h>
360 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
361 #include <immintrin.h>
366 #elif defined __VSX__
368 #define EIGEN_VECTORIZE
369 #define EIGEN_VECTORIZE_VSX
377 #elif defined __ALTIVEC__
379 #define EIGEN_VECTORIZE
380 #define EIGEN_VECTORIZE_ALTIVEC
388 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
390 #define EIGEN_VECTORIZE
391 #define EIGEN_VECTORIZE_NEON
392 #include <arm_neon.h>
396 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
398 #define EIGEN_VECTORIZE
399 #define EIGEN_VECTORIZE_SVE
404 #if defined __ARM_FEATURE_SVE_BITS
405 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
407#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
410#elif (defined __s390x__ && defined __VEC__)
412#define EIGEN_VECTORIZE
413#define EIGEN_VECTORIZE_ZVECTOR
414#include <vecintrin.h>
416#elif defined __mips_msa
420#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
426#define EIGEN_VECTORIZE
427#define EIGEN_VECTORIZE_MSA
437#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
438 #include <arm_fp16.h>
441#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
443 #define EIGEN_HAS_FP16_C
445 #if defined(EIGEN_COMP_CLANG)
449 #include <immintrin.h>
453#if defined EIGEN_CUDACC
454 #define EIGEN_VECTORIZE_GPU
455 #include <vector_types.h>
456 #if EIGEN_CUDA_SDK_VER >= 70500
457 #define EIGEN_HAS_CUDA_FP16
461#if defined(EIGEN_HAS_CUDA_FP16)
462 #include <cuda_runtime_api.h>
463 #include <cuda_fp16.h>
466#if defined(EIGEN_HIPCC)
467 #define EIGEN_VECTORIZE_GPU
468 #include <hip/hip_vector_types.h>
469 #define EIGEN_HAS_HIP_FP16
470 #include <hip/hip_fp16.h>
478#if defined(EIGEN_VECTORIZE_AVX512)
479 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
480#elif defined(EIGEN_VECTORIZE_AVX)
481 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
482#elif defined(EIGEN_VECTORIZE_SSE4_2)
483 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
484#elif defined(EIGEN_VECTORIZE_SSE4_1)
485 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
486#elif defined(EIGEN_VECTORIZE_SSSE3)
487 return "SSE, SSE2, SSE3, SSSE3";
488#elif defined(EIGEN_VECTORIZE_SSE3)
489 return "SSE, SSE2, SSE3";
490#elif defined(EIGEN_VECTORIZE_SSE2)
492#elif defined(EIGEN_VECTORIZE_ALTIVEC)
494#elif defined(EIGEN_VECTORIZE_VSX)
496#elif defined(EIGEN_VECTORIZE_NEON)
498#elif defined(EIGEN_VECTORIZE_SVE)
500#elif defined(EIGEN_VECTORIZE_ZVECTOR)
501 return "S390X ZVECTOR";
502#elif defined(EIGEN_VECTORIZE_MSA)
Namespace containing all symbols from the Eigen library.
Definition: Core:141
static const char * SimdInstructionSetsInUse(void)
Definition: ConfigureVectorization.h:477