10#ifndef EIGEN_PACKET_MATH_AVX_H
11#define EIGEN_PACKET_MATH_AVX_H
17#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
18#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
21#if !defined(EIGEN_VECTORIZE_AVX512) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS)
22#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
25#ifdef EIGEN_VECTORIZE_FMA
26#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
27#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
43#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
44 const Packet8f p8f_##NAME = pset1<Packet8f>(X)
46#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
47 const Packet4d p4d_##NAME = pset1<Packet4d>(X)
49#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
50 const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
52#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
53 const Packet8i p8i_##NAME = pset1<Packet8i>(X)
57#ifndef EIGEN_VECTORIZE_AVX512
58template<>
struct packet_traits<float> : default_packet_traits
89template<>
struct packet_traits<double> : default_packet_traits
157struct packet_traits<
bfloat16> : default_packet_traits {
201template<>
struct scalar_div_cost<float,true> {
enum {
value = 14 }; };
202template<>
struct scalar_div_cost<double,true> {
enum {
value = 16 }; };
235 return _mm_packs_epi32(_mm256_extractf128_si256(_mm256_castps_si256(rf), 0),
236 _mm256_extractf128_si256(_mm256_castps_si256(rf), 1));
265#ifdef EIGEN_VECTORIZE_AVX2
266 return _mm256_add_epi32(a,
b);
268 __m128i lo = _mm_add_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(
b, 0));
269 __m128i hi = _mm_add_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(
b, 1));
270 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
277#ifdef EIGEN_VECTORIZE_AVX2
278 return _mm256_sub_epi32(a,
b);
280 __m128i lo = _mm_sub_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(
b, 0));
281 __m128i hi = _mm_sub_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(
b, 1));
282 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
288 return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
292 return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
302#ifdef EIGEN_VECTORIZE_AVX2
303 return _mm256_mullo_epi32(a,
b);
305 const __m128i lo = _mm_mullo_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(
b, 0));
306 const __m128i hi = _mm_mullo_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(
b, 1));
307 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
314{
eigen_assert(
false &&
"packet integer division are not supported by AVX");
318#ifdef EIGEN_VECTORIZE_FMA
320#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
327 __asm__(
"vfmadd231ps %[a], %[b], %[c]" : [
c]
"+x" (res) : [a]
"x" (a), [
b]
"x" (
b));
330 return _mm256_fmadd_ps(a,
b,
c);
334#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
337 __asm__(
"vfmadd231pd %[a], %[b], %[c]" : [
c]
"+x" (res) : [a]
"x" (a), [
b]
"x" (
b));
340 return _mm256_fmadd_pd(a,
b,
c);
357#ifdef EIGEN_VECTORIZE_AVX2
358 return _mm256_cmpeq_epi32(a,
b);
360 __m128i lo = _mm_cmpeq_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(
b, 0));
361 __m128i hi = _mm_cmpeq_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(
b, 1));
362 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
367#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
373 asm(
"vminps %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
377 return _mm256_min_ps(
b,a);
381#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
384 asm(
"vminpd %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
388 return _mm256_min_pd(
b,a);
393#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
396 asm(
"vmaxps %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
400 return _mm256_max_ps(
b,a);
404#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
407 asm(
"vmaxpd %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
411 return _mm256_max_pd(
b,a);
460#ifdef EIGEN_VECTORIZE_AVX2
462 return _mm256_cmpeq_epi32(a,a);
464 const __m256
b = _mm256_castsi256_ps(a);
465 return _mm256_castps_si256(_mm256_cmp_ps(
b,
b,_CMP_TRUE_UQ));
470#ifdef EIGEN_VECTORIZE_AVX2
472 const __m256i
b = _mm256_castps_si256(a);
473 return _mm256_castsi256_ps(_mm256_cmpeq_epi32(
b,
b));
475 return _mm256_cmp_ps(a,a,_CMP_TRUE_UQ);
480#ifdef EIGEN_VECTORIZE_AVX2
482 const __m256i
b = _mm256_castpd_si256(a);
483 return _mm256_castsi256_pd(_mm256_cmpeq_epi64(
b,
b));
485 return _mm256_cmp_pd(a,a,_CMP_TRUE_UQ);
492#ifdef EIGEN_VECTORIZE_AVX2
493 return _mm256_and_si256(a,
b);
495 return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a),_mm256_castsi256_ps(
b)));
502#ifdef EIGEN_VECTORIZE_AVX2
503 return _mm256_or_si256(a,
b);
505 return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a),_mm256_castsi256_ps(
b)));
512#ifdef EIGEN_VECTORIZE_AVX2
513 return _mm256_xor_si256(a,
b);
515 return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a),_mm256_castsi256_ps(
b)));
522#ifdef EIGEN_VECTORIZE_AVX2
523 return _mm256_andnot_si256(
b,a);
525 return _mm256_castps_si256(_mm256_andnot_ps(_mm256_castsi256_ps(
b),_mm256_castsi256_ps(a)));
533 return _mm256_round_ps(
padd(
por(
pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);
539 return _mm256_round_pd(
padd(
por(
pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);
543{
return _mm256_blendv_ps(
b,a,mask); }
545{
return _mm256_blendv_pd(
b,a,mask); }
548#ifdef EIGEN_VECTORIZE_AVX2
549 return _mm256_srai_epi32(a, N);
551 __m128i lo = _mm_srai_epi32(_mm256_extractf128_si256(a, 0), N);
552 __m128i hi = _mm_srai_epi32(_mm256_extractf128_si256(a, 1), N);
553 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
558#ifdef EIGEN_VECTORIZE_AVX2
559 return _mm256_srli_epi32(a, N);
561 __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(a, 0), N);
562 __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(a, 1), N);
563 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
568#ifdef EIGEN_VECTORIZE_AVX2
569 return _mm256_slli_epi32(a, N);
571 __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(a, 0), N);
572 __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(a, 1), N);
573 return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
586 Packet8i mask = _mm256_set1_epi8(
static_cast<char>(umask));
587 const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
589 mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
602 Packet8f tmp = _mm256_broadcast_ps((
const __m128*)(
const void*)
from);
604 tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
606 return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
611 Packet4d tmp = _mm256_broadcast_pd((
const __m128d*)(
const void*)
from);
612 return _mm256_permute_pd(tmp, 3<<2);
618 Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(
from));
619 return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(
from+1), 1);
631 Packet8i mask = _mm256_set1_epi8(
static_cast<char>(umask));
632 const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
634 mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
642 return _mm256_set_ps(
from[7*stride],
from[6*stride],
from[5*stride],
from[4*stride],
647 return _mm256_set_pd(
from[3*stride],
from[2*stride],
from[1*stride],
from[0*stride]);
652 __m128 low = _mm256_extractf128_ps(
from, 0);
653 to[stride*0] = _mm_cvtss_f32(low);
654 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
655 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
656 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
658 __m128 high = _mm256_extractf128_ps(
from, 1);
659 to[stride*4] = _mm_cvtss_f32(high);
660 to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
661 to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
662 to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
666 __m128d low = _mm256_extractf128_pd(
from, 0);
667 to[stride*0] = _mm_cvtsd_f64(low);
668 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
669 __m128d high = _mm256_extractf128_pd(
from, 1);
670 to[stride*2] = _mm_cvtsd_f64(high);
671 to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
690#ifndef EIGEN_VECTORIZE_AVX512
697 return _mm_cvtss_f32(_mm256_castps256_ps128(a));
700 return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
703 return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
709 __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
710 return _mm256_permute2f128_ps(tmp, tmp, 1);
714 __m256d tmp = _mm256_shuffle_pd(a,a,5);
715 return _mm256_permute2f128_pd(tmp, tmp, 1);
719 __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
720 return _mm256_permute_pd(swap_halves,5);
727 const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
728 return _mm256_and_ps(a,mask);
732 const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
733 return _mm256_and_pd(a,mask);
745 __m256i a_expo = _mm256_castpd_si256(
pand(a, cst_exp_mask));
746#ifdef EIGEN_VECTORIZE_AVX2
747 a_expo = _mm256_srli_epi64(a_expo, 52);
748 __m128i lo = _mm256_extractf128_si256(a_expo, 0);
749 __m128i hi = _mm256_extractf128_si256(a_expo, 1);
751 __m128i lo = _mm256_extractf128_si256(a_expo, 0);
752 __m128i hi = _mm256_extractf128_si256(a_expo, 1);
753 lo = _mm_srli_epi64(lo, 52);
754 hi = _mm_srli_epi64(hi, 52);
758 Packet4d exponent = _mm256_insertf128_pd(_mm256_setzero_pd(), exponent_lo, 0);
759 exponent = _mm256_insertf128_pd(exponent, exponent_hi, 1);
783 Packet4i lo = _mm_slli_epi64(hi, 52);
784 hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);
785 Packet4d c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));
791 lo = _mm_slli_epi64(hi, 52);
792 hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);
793 c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));
800 return predux(
Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
804 return predux(
Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
809 return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
815 tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
816 tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
817 return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
822 tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
823 return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
828 Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
829 tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
830 return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
834 Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
835 return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
840 Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
841 tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
842 return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
847 Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
848 return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
859 return _mm256_movemask_ps(x)!=0;
864 __m256 T0 = _mm256_unpacklo_ps(kernel.
packet[0], kernel.
packet[1]);
865 __m256 T1 = _mm256_unpackhi_ps(kernel.
packet[0], kernel.
packet[1]);
866 __m256 T2 = _mm256_unpacklo_ps(kernel.
packet[2], kernel.
packet[3]);
867 __m256 T3 = _mm256_unpackhi_ps(kernel.
packet[2], kernel.
packet[3]);
868 __m256 T4 = _mm256_unpacklo_ps(kernel.
packet[4], kernel.
packet[5]);
869 __m256 T5 = _mm256_unpackhi_ps(kernel.
packet[4], kernel.
packet[5]);
870 __m256 T6 = _mm256_unpacklo_ps(kernel.
packet[6], kernel.
packet[7]);
871 __m256 T7 = _mm256_unpackhi_ps(kernel.
packet[6], kernel.
packet[7]);
872 __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
873 __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
874 __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
875 __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
876 __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
877 __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
878 __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
879 __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
880 kernel.
packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
881 kernel.
packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
882 kernel.
packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
883 kernel.
packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
884 kernel.
packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
885 kernel.
packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
886 kernel.
packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
887 kernel.
packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
892 __m256 T0 = _mm256_unpacklo_ps(kernel.
packet[0], kernel.
packet[1]);
893 __m256 T1 = _mm256_unpackhi_ps(kernel.
packet[0], kernel.
packet[1]);
894 __m256 T2 = _mm256_unpacklo_ps(kernel.
packet[2], kernel.
packet[3]);
895 __m256 T3 = _mm256_unpackhi_ps(kernel.
packet[2], kernel.
packet[3]);
897 __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
898 __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
899 __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
900 __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
902 kernel.
packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
903 kernel.
packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
904 kernel.
packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
905 kernel.
packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
910 __m256d T0 = _mm256_shuffle_pd(kernel.
packet[0], kernel.
packet[1], 15);
911 __m256d T1 = _mm256_shuffle_pd(kernel.
packet[0], kernel.
packet[1], 0);
912 __m256d T2 = _mm256_shuffle_pd(kernel.
packet[2], kernel.
packet[3], 15);
913 __m256d T3 = _mm256_shuffle_pd(kernel.
packet[2], kernel.
packet[3], 0);
915 kernel.
packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
916 kernel.
packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
917 kernel.
packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
918 kernel.
packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
922 const __m256 zero = _mm256_setzero_ps();
924 __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
925 return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
928 const __m256d zero = _mm256_setzero_pd();
929 const __m256d select = _mm256_set_pd(ifPacket.
select[3], ifPacket.
select[2], ifPacket.
select[1], ifPacket.
select[0]);
930 __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
931 return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
939 return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(
from));
943 return numext::bit_cast<Eigen::half>(
static_cast<numext::uint16_t>(_mm_extract_epi16(
from, 0)));
947 return _mm_load_si128(
reinterpret_cast<const __m128i*
>(
from));
951 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(
from));
955 _mm_store_si128(
reinterpret_cast<__m128i*
>(to),
from);
959 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(to),
from);
968 return _mm_set_epi16(d, d,
c,
c,
b,
b, a, a);
975 return _mm_set_epi16(
b,
b,
b,
b, a, a, a, a);
979 return _mm_cmpeq_epi32(a, a);
984 const __m128i sign_mask = _mm_set1_epi16(
static_cast<numext::uint16_t>(0x8000));
985 return _mm_andnot_si128(sign_mask, a);
989#ifdef EIGEN_HAS_FP16_C
990 return _mm256_cvtph_ps(a);
1003 return _mm256_set_ps(f7, f6, f5, f4, f3, f2, f1, f0);
1008#ifdef EIGEN_HAS_FP16_C
1009 return _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
1021 return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);
1045 return _mm_or_si128(a,
b);
1048 return _mm_xor_si128(a,
b);
1051 return _mm_and_si128(a,
b);
1054 return _mm_andnot_si128(
b,a);
1058 return _mm_blendv_epi8(
b, a, mask);
1097 return _mm_xor_si128(a, sign_mask);
1138 return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);
1145 to[stride*0] = aux[0];
1146 to[stride*1] = aux[1];
1147 to[stride*2] = aux[2];
1148 to[stride*3] = aux[3];
1149 to[stride*4] = aux[4];
1150 to[stride*5] = aux[5];
1151 to[stride*6] = aux[6];
1152 to[stride*7] = aux[7];
1181 __m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);
1182 return _mm_shuffle_epi8(a,m);
1187 __m128i a = kernel.
packet[0];
1190 __m128i d = kernel.
packet[3];
1192 __m128i f = kernel.
packet[5];
1193 __m128i g = kernel.
packet[6];
1196 __m128i a03b03 = _mm_unpacklo_epi16(a,
b);
1197 __m128i c03d03 = _mm_unpacklo_epi16(
c, d);
1198 __m128i e03f03 = _mm_unpacklo_epi16(
e, f);
1199 __m128i g03h03 = _mm_unpacklo_epi16(g,
h);
1200 __m128i a47b47 = _mm_unpackhi_epi16(a,
b);
1201 __m128i c47d47 = _mm_unpackhi_epi16(
c, d);
1202 __m128i e47f47 = _mm_unpackhi_epi16(
e, f);
1203 __m128i g47h47 = _mm_unpackhi_epi16(g,
h);
1205 __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);
1206 __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);
1207 __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);
1208 __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);
1209 __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);
1210 __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);
1211 __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);
1212 __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);
1214 __m128i a0b0c0d0e0f0g0h0 = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);
1215 __m128i a1b1c1d1e1f1g1h1 = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);
1216 __m128i a2b2c2d2e2f2g2h2 = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);
1217 __m128i a3b3c3d3e3f3g3h3 = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);
1218 __m128i a4b4c4d4e4f4g4h4 = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);
1219 __m128i a5b5c5d5e5f5g5h5 = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);
1220 __m128i a6b6c6d6e6f6g6h6 = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);
1221 __m128i a7b7c7d7e7f7g7h7 = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);
1223 kernel.
packet[0] = a0b0c0d0e0f0g0h0;
1224 kernel.
packet[1] = a1b1c1d1e1f1g1h1;
1225 kernel.
packet[2] = a2b2c2d2e2f2g2h2;
1226 kernel.
packet[3] = a3b3c3d3e3f3g3h3;
1227 kernel.
packet[4] = a4b4c4d4e4f4g4h4;
1228 kernel.
packet[5] = a5b5c5d5e5f5g5h5;
1229 kernel.
packet[6] = a6b6c6d6e6f6g6h6;
1230 kernel.
packet[7] = a7b7c7d7e7f7g7h7;
1236 pstore<Eigen::half>(in[0], kernel.
packet[0]);
1237 pstore<Eigen::half>(in[1], kernel.
packet[1]);
1238 pstore<Eigen::half>(in[2], kernel.
packet[2]);
1239 pstore<Eigen::half>(in[3], kernel.
packet[3]);
1243 for (
int i = 0; i < 4; ++i) {
1244 for (
int j = 0; j < 4; ++j) {
1245 out[i][j] = in[j][2*i];
1247 for (
int j = 0; j < 4; ++j) {
1248 out[i][j+4] = in[j][2*i+1];
1261#ifdef EIGEN_VECTORIZE_AVX2
1262 __m256i extend = _mm256_cvtepu16_epi32(a);
1263 return _mm256_castsi256_ps(_mm256_slli_epi32(extend, 16));
1265 __m128i lo = _mm_cvtepu16_epi32(a);
1266 __m128i hi = _mm_cvtepu16_epi32(_mm_srli_si128(a, 8));
1267 __m128i lo_shift = _mm_slli_epi32(lo, 16);
1268 __m128i hi_shift = _mm_slli_epi32(hi, 16);
1269 return _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo_shift), hi_shift, 1));
1277 __m256i input = _mm256_castps_si256(a);
1279#ifdef EIGEN_VECTORIZE_AVX2
1281 __m256i t = _mm256_srli_epi32(input, 16);
1283 t = _mm256_and_si256(t, _mm256_set1_epi32(1));
1285 t = _mm256_add_epi32(t, _mm256_set1_epi32(0x7fff));
1287 t = _mm256_add_epi32(t, input);
1289 t = _mm256_srli_epi32(t, 16);
1291 __m256 mask = _mm256_cmp_ps(a, a, _CMP_ORD_Q);
1292 __m256i nan = _mm256_set1_epi32(0x7fc0);
1293 t = _mm256_blendv_epi8(nan, t, _mm256_castps_si256(mask));
1295 return _mm_packus_epi32(_mm256_extractf128_si256(t, 0),
1296 _mm256_extractf128_si256(t, 1));
1299 __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(input, 0), 16);
1300 __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(input, 1), 16);
1302 lo = _mm_and_si128(lo, _mm_set1_epi32(1));
1303 hi = _mm_and_si128(hi, _mm_set1_epi32(1));
1305 lo = _mm_add_epi32(lo, _mm_set1_epi32(0x7fff));
1306 hi = _mm_add_epi32(hi, _mm_set1_epi32(0x7fff));
1308 lo = _mm_add_epi32(lo, _mm256_extractf128_si256(input, 0));
1309 hi = _mm_add_epi32(hi, _mm256_extractf128_si256(input, 1));
1311 lo = _mm_srli_epi32(lo, 16);
1312 hi = _mm_srli_epi32(hi, 16);
1314 __m256 mask = _mm256_cmp_ps(a, a, _CMP_ORD_Q);
1315 __m128i nan = _mm_set1_epi32(0x7fc0);
1316 lo = _mm_blendv_epi8(nan, lo, _mm_castps_si128(_mm256_castps256_ps128(mask)));
1317 hi = _mm_blendv_epi8(nan, hi, _mm_castps_si128(_mm256_extractf128_ps(mask, 1)));
1319 return _mm_packus_epi32(lo, hi);
1324 return _mm_set1_epi16(numext::bit_cast<numext::uint16_t>(
from));
1332 return _mm_load_si128(
reinterpret_cast<const __m128i*
>(
from));
1336 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(
from));
1340 _mm_store_si128(
reinterpret_cast<__m128i*
>(to),
from);
1344 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(to),
from);
1353 return _mm_set_epi16(d, d,
c,
c,
b,
b, a, a);
1360 return _mm_set_epi16(
b,
b,
b,
b, a, a, a, a);
1364 return _mm_cmpeq_epi32(a, a);
1369 const __m128i sign_mask = _mm_set1_epi16(
static_cast<numext::uint16_t>(0x8000));
1370 return _mm_andnot_si128(sign_mask, a);
1391 return _mm_or_si128(a,
b);
1394 return _mm_xor_si128(a,
b);
1397 return _mm_and_si128(a,
b);
1400 return _mm_andnot_si128(
b,a);
1404 return _mm_blendv_epi8(
b, a, mask);
1444 return _mm_xor_si128(a, sign_mask);
1474 return _mm_set_epi16(s7, s6, s5, s4, s3, s2, s1, s0);
1481 to[stride*0] = aux[0];
1482 to[stride*1] = aux[1];
1483 to[stride*2] = aux[2];
1484 to[stride*3] = aux[3];
1485 to[stride*4] = aux[4];
1486 to[stride*5] = aux[5];
1487 to[stride*6] = aux[6];
1488 to[stride*7] = aux[7];
1509 __m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);
1510 return _mm_shuffle_epi8(a,m);
1515 __m128i a = kernel.
packet[0];
1518 __m128i d = kernel.
packet[3];
1520 __m128i f = kernel.
packet[5];
1521 __m128i g = kernel.
packet[6];
1524 __m128i a03b03 = _mm_unpacklo_epi16(a,
b);
1525 __m128i c03d03 = _mm_unpacklo_epi16(
c, d);
1526 __m128i e03f03 = _mm_unpacklo_epi16(
e, f);
1527 __m128i g03h03 = _mm_unpacklo_epi16(g,
h);
1528 __m128i a47b47 = _mm_unpackhi_epi16(a,
b);
1529 __m128i c47d47 = _mm_unpackhi_epi16(
c, d);
1530 __m128i e47f47 = _mm_unpackhi_epi16(
e, f);
1531 __m128i g47h47 = _mm_unpackhi_epi16(g,
h);
1533 __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);
1534 __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);
1535 __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);
1536 __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);
1537 __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);
1538 __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);
1539 __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);
1540 __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);
1542 kernel.
packet[0] = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);
1543 kernel.
packet[1] = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);
1544 kernel.
packet[2] = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);
1545 kernel.
packet[3] = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);
1546 kernel.
packet[4] = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);
1547 kernel.
packet[5] = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);
1548 kernel.
packet[6] = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);
1549 kernel.
packet[7] = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);
1554 __m128i a = kernel.
packet[0];
1557 __m128i d = kernel.
packet[3];
1559 __m128i ab_03 = _mm_unpacklo_epi16(a,
b);
1560 __m128i cd_03 = _mm_unpacklo_epi16(
c, d);
1561 __m128i ab_47 = _mm_unpackhi_epi16(a,
b);
1562 __m128i cd_47 = _mm_unpackhi_epi16(
c, d);
1564 kernel.
packet[0] = _mm_unpacklo_epi32(ab_03, cd_03);
1565 kernel.
packet[1] = _mm_unpackhi_epi32(ab_03, cd_03);
1566 kernel.
packet[2] = _mm_unpacklo_epi32(ab_47, cd_47);
1567 kernel.
packet[3] = _mm_unpackhi_epi32(ab_47, cd_47);
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:35
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:27
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:39
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:31
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:986
#define eigen_assert(x)
Definition: Macros.h:1047
#define EIGEN_FAST_MATH
Allows to disable some optimizations which might affect the accuracy of the result.
Definition: Macros.h:49
#define EIGEN_STRONG_INLINE
Definition: Macros.h:927
#define vec4i_swizzle1(v, p, q, r, s)
Definition: PacketMath.h:64
and restrictions which apply to each piece of software is included later in this file and or inside of the individual applicable source files The disclaimer of warranty in the WPILib license above applies to all code in and nothing in any of the other licenses gives permission to use the names of FIRST nor the names of the WPILib contributors to endorse or promote products derived from this software The following pieces of software have additional or alternate and or Google Inc All rights reserved Redistribution and use in source and binary with or without are permitted provided that the following conditions are this list of conditions and the following disclaimer *Redistributions in binary form must reproduce the above copyright this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution *Neither the name of Google Inc nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED BUT NOT LIMITED THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY OR CONSEQUENTIAL WHETHER IN STRICT OR EVEN IF ADVISED OF THE POSSIBILITY OF SUCH January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable from
Definition: ThirdPartyNotices.txt:131
@ Aligned32
Data pointer is aligned on a 32 bytes boundary.
Definition: Constants.h:236
@ Aligned16
Data pointer is aligned on a 16 bytes boundary.
Definition: Constants.h:235
EIGEN_STRONG_INLINE Packet8i ploadu< Packet8i >(const int *from)
Definition: PacketMath.h:583
EIGEN_STRONG_INLINE Eigen::half predux< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1155
__m256i Packet8i
Definition: PacketMath.h:32
EIGEN_STRONG_INLINE Packet4d print< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:450
EIGEN_STRONG_INLINE Packet8f pmax< PropagateNumbers, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:425
EIGEN_STRONG_INLINE Packet pminmax_propagate_numbers(const Packet &a, const Packet &b, Op op)
Definition: PacketMath.h:546
EIGEN_STRONG_INLINE Packet8bf print< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1412
__m128d Packet2d
Definition: PacketMath.h:43
EIGEN_STRONG_INLINE Packet8bf pgather< bfloat16, Packet8bf >(const bfloat16 *from, Index stride)
Definition: PacketMath.h:1464
EIGEN_STRONG_INLINE Packet8h pmax< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1032
EIGEN_STRONG_INLINE Packet8i psub< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: PacketMath.h:276
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux(const Packet &a)
Definition: GenericPacketMath.h:875
EIGEN_STRONG_INLINE Packet8h pmul< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1114
EIGEN_STRONG_INLINE Packet8f pmin< PropagateNumbers, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:417
EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f &a)
Definition: PacketMath.h:1007
EIGEN_STRONG_INLINE Packet8f pfloor< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:455
EIGEN_STRONG_INLINE Packet8f pmax< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:392
EIGEN_STRONG_INLINE Packet4d pset1< Packet4d >(const double &from)
Definition: PacketMath.h:241
EIGEN_STRONG_INLINE Packet8f pmax< PropagateNaN, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:441
EIGEN_STRONG_INLINE Packet8i padd< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: PacketMath.h:264
EIGEN_STRONG_INLINE void pstore1< Packet8i >(int *to, const int &a)
Definition: PacketMath.h:684
EIGEN_STRONG_INLINE Packet4d pfrexp< Packet4d >(const Packet4d &a, Packet4d &exponent)
Definition: PacketMath.h:764
EIGEN_STRONG_INLINE bfloat16 predux_mul< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1503
__m128 Packet4f
Definition: PacketMath.h:42
EIGEN_STRONG_INLINE Packet8bf pmin< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: PacketMath.h:1374
EIGEN_STRONG_INLINE float predux_mul< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:812
EIGEN_STRONG_INLINE Packet8i pset1< Packet8i >(const int &from)
Definition: PacketMath.h:242
EIGEN_STRONG_INLINE __m128i Pack16To8(Packet8f rf)
Definition: PacketMath.h:234
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: PacketMath.h:259
EIGEN_STRONG_INLINE Packet8bf pceil< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1416
EIGEN_STRONG_INLINE Packet8bf pdiv< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: PacketMath.h:1459
EIGEN_STRONG_INLINE Packet8f pselect< Packet8f >(const Packet8f &mask, const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:542
EIGEN_STRONG_INLINE Packet8bf pround< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1407
EIGEN_STRONG_INLINE void pstore< bfloat16 >(bfloat16 *to, const Packet4bf &from)
Definition: PacketMath.h:3441
EIGEN_STRONG_INLINE void pstore1< Packet8f >(float *to, const float &a)
Definition: PacketMath.h:674
EIGEN_STRONG_INLINE Packet8h print< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1065
EIGEN_STRONG_INLINE Packet8h pset1< Packet8h >(const Eigen::half &from)
Definition: PacketMath.h:938
EIGEN_STRONG_INLINE Packet8h psub< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1107
EIGEN_STRONG_INLINE Packet8f pfrexp< Packet8f >(const Packet8f &a, Packet8f &exponent)
Definition: PacketMath.h:736
EIGEN_STRONG_INLINE Packet8f pxor< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:509
EIGEN_STRONG_INLINE Packet4bf pxor(const Packet4bf &a, const Packet4bf &b)
Definition: PacketMath.h:3503
EIGEN_STRONG_INLINE Packet4d pceil< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:453
EIGEN_STRONG_INLINE Packet8h pfloor< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1073
EIGEN_STRONG_INLINE float predux_min< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:826
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet2d &from)
Definition: PacketMath.h:794
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: PacketMath.h:1118
EIGEN_STRONG_INLINE int pfirst< Packet8i >(const Packet8i &a)
Definition: PacketMath.h:702
EIGEN_STRONG_INLINE Packet8i por< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: PacketMath.h:501
EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i &a, const Packet4i &b, const Packet4i &c)
Definition: PacketMath.h:370
EIGEN_STRONG_INLINE Packet8f pset1frombits< Packet8f >(unsigned int from)
Definition: PacketMath.h:244
EIGEN_STRONG_INLINE Packet8f psub< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:274
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:244
EIGEN_DEVICE_FUNC void ptranspose(PacketBlock< Packet4f, 4 > &kernel)
Definition: PacketMath.h:1124
EIGEN_STRONG_INLINE Packet4d pldexp< Packet4d >(const Packet4d &a, const Packet4d &exponent)
Definition: PacketMath.h:772
EIGEN_STRONG_INLINE Packet4f predux_half_dowto4< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:807
EIGEN_STRONG_INLINE Packet8f pmin< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:366
EIGEN_STRONG_INLINE Packet4d pmin< PropagateNumbers, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:421
EIGEN_STRONG_INLINE Packet8f padd< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:262
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
Definition: PacketMath.h:599
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:524
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: PacketMath.h:1242
EIGEN_STRONG_INLINE double predux< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:802
EIGEN_STRONG_INLINE Packet8bf padd< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: PacketMath.h:1447
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:431
EIGEN_STRONG_INLINE Packet8h plset< Packet8h >(const half &a)
Definition: PacketMath.h:1038
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i &a)
Definition: PacketMath.h:598
EIGEN_STRONG_INLINE Packet4d pdiv< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:312
EIGEN_STRONG_INLINE Packet4bf pand(const Packet4bf &a, const Packet4bf &b)
Definition: PacketMath.h:3507
EIGEN_STRONG_INLINE Packet pminmax_propagate_nan(const Packet &a, const Packet &b, Op op)
Definition: PacketMath.h:555
EIGEN_STRONG_INLINE void pstore1< Packet4d >(double *to, const double &a)
Definition: PacketMath.h:679
EIGEN_STRONG_INLINE Packet8h pdiv< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1121
EIGEN_STRONG_INLINE Packet8i pdiv< Packet8i >(const Packet8i &, const Packet8i &)
Definition: PacketMath.h:313
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet4i &from)
Definition: PacketMath.h:791
EIGEN_STRONG_INLINE Packet8bf plset< Packet8bf >(const bfloat16 &a)
Definition: PacketMath.h:1386
EIGEN_STRONG_INLINE Packet8bf pmul< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: PacketMath.h:1455
EIGEN_STRONG_INLINE Packet8bf ploaddup< Packet8bf >(const bfloat16 *from)
Definition: PacketMath.h:1348
EIGEN_STRONG_INLINE Packet8f ploaddup< Packet8f >(const float *from)
Definition: PacketMath.h:594
EIGEN_STRONG_INLINE Packet8f pandnot< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:519
EIGEN_STRONG_INLINE float predux< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:798
EIGEN_STRONG_INLINE Packet8f plset< Packet8f >(const float &a)
Definition: PacketMath.h:259
EIGEN_STRONG_INLINE Packet4d pmax< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:403
EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h &a)
Definition: PacketMath.h:988
EIGEN_STRONG_INLINE Packet8f ploadu< Packet8f >(const float *from)
Definition: PacketMath.h:581
EIGEN_STRONG_INLINE double predux_min< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:832
EIGEN_STRONG_INLINE Packet2d pfrexp_generic_get_biased_exponent(const Packet2d &a)
Definition: PacketMath.h:912
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:237
EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h &a)
Definition: PacketMath.h:978
EIGEN_STRONG_INLINE Packet4d pmin< PropagateNaN, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:437
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:512
EIGEN_STRONG_INLINE Eigen::half predux_min< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1167
EIGEN_STRONG_INLINE double predux_max< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:845
EIGEN_STRONG_INLINE Packet8f pload< Packet8f >(const float *from)
Definition: PacketMath.h:577
EIGEN_STRONG_INLINE Packet4d ptrue< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:479
EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:432
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet2d &from)
Definition: PacketMath.h:790
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: PacketMath.h:868
EIGEN_STRONG_INLINE Packet4d ploadu< Packet4d >(const double *from)
Definition: PacketMath.h:582
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i &a)
Definition: PacketMath.h:597
EIGEN_STRONG_INLINE Packet8i pload< Packet8i >(const int *from)
Definition: PacketMath.h:579
EIGEN_STRONG_INLINE Packet8f ploadquad< Packet8f >(const float *from)
Definition: PacketMath.h:616
EIGEN_STRONG_INLINE Packet4d pmul< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:300
EIGEN_STRONG_INLINE Packet8i pxor< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: PacketMath.h:511
eigen_packet_wrapper< __m128i, 2 > Packet8h
Definition: PacketMath.h:34
EIGEN_STRONG_INLINE Packet4d ploaddup< Packet4d >(const double *from)
Definition: PacketMath.h:609
EIGEN_STRONG_INLINE Packet4bf F32ToBf16(const Packet4f &p)
Definition: PacketMath.h:3387
EIGEN_STRONG_INLINE Packet8f pceil< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:452
EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:434
EIGEN_STRONG_INLINE Packet8f por< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:499
EIGEN_STRONG_INLINE float pfirst< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:696
EIGEN_STRONG_INLINE Packet4f pzero(const Packet4f &)
Definition: PacketMath.h:270
EIGEN_STRONG_INLINE Packet8h pceil< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1069
EIGEN_STRONG_INLINE float predux_max< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:838
EIGEN_STRONG_INLINE Packet8f pand< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:489
EIGEN_STRONG_INLINE Packet8bf pfloor< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1420
EIGEN_STRONG_INLINE Packet4f peven_mask(const Packet4f &)
Definition: PacketMath.h:266
const char * SsePrefetchPtrType
Definition: PacketMath.h:864
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: PacketMath.h:789
EIGEN_STRONG_INLINE Packet4d pselect< Packet4d >(const Packet4d &mask, const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:544
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: PacketMath.h:601
EIGEN_STRONG_INLINE Packet8i pandnot< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: PacketMath.h:521
EIGEN_STRONG_INLINE Packet4d pround< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:535
EIGEN_DEVICE_FUNC Packet4d pgather< double, Packet4d >(const double *from, Index stride)
Definition: PacketMath.h:645
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:696
EIGEN_STRONG_INLINE Eigen::half predux_max< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1161
EIGEN_STRONG_INLINE Packet4bf por(const Packet4bf &a, const Packet4bf &b)
Definition: PacketMath.h:3499
EIGEN_STRONG_INLINE Packet4d plset< Packet4d >(const double &a)
Definition: PacketMath.h:260
EIGEN_DEVICE_FUNC void pscatter< double, Packet4d >(double *to, const Packet4d &from, Index stride)
Definition: PacketMath.h:664
EIGEN_STRONG_INLINE Packet8bf psub< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: PacketMath.h:1451
EIGEN_STRONG_INLINE bfloat16 predux< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1491
EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f &a)
Definition: PacketMath.h:891
EIGEN_STRONG_INLINE Packet4d pmin< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:380
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Default implementation of pldexp.
Definition: GenericPacketMathFunctions.h:85
EIGEN_STRONG_INLINE Packet8bf pload< Packet8bf >(const bfloat16 *from)
Definition: PacketMath.h:1331
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet4i &from)
Definition: PacketMath.h:796
EIGEN_STRONG_INLINE Packet4d pand< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:490
EIGEN_STRONG_INLINE Packet8f pround< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:529
EIGEN_STRONG_INLINE Packet8f pmul< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:299
EIGEN_STRONG_INLINE Packet8f pldexp< Packet8f >(const Packet8f &a, const Packet8f &exponent)
Definition: PacketMath.h:768
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type pfirst(const Packet &a)
Definition: GenericPacketMath.h:844
EIGEN_STRONG_INLINE Packet8h pmin< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1026
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Default implementation of pfrexp.
Definition: GenericPacketMathFunctions.h:40
EIGEN_STRONG_INLINE Packet4bf pandnot(const Packet4bf &a, const Packet4bf &b)
Definition: PacketMath.h:3511
EIGEN_STRONG_INLINE Packet4d pmax< PropagateNumbers, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:429
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:222
EIGEN_STRONG_INLINE bfloat16 predux_min< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1499
EIGEN_STRONG_INLINE Packet4d pload< Packet4d >(const double *from)
Definition: PacketMath.h:578
EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f &a)
Definition: PacketMath.h:342
EIGEN_STRONG_INLINE Packet8f pload1< Packet8f >(const float *from)
Definition: PacketMath.h:256
EIGEN_STRONG_INLINE void pscatter< bfloat16, Packet8bf >(bfloat16 *to, const Packet8bf &from, Index stride)
Definition: PacketMath.h:1477
EIGEN_STRONG_INLINE Packet4d pxor< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:510
EIGEN_DEVICE_FUNC Packet8f pgather< float, Packet8f >(const float *from, Index stride)
Definition: PacketMath.h:640
EIGEN_STRONG_INLINE Packet8i ptrue< Packet8i >(const Packet8i &a)
Definition: PacketMath.h:459
EIGEN_STRONG_INLINE Packet8h ploaddup< Packet8h >(const Eigen::half *from)
Definition: PacketMath.h:963
EIGEN_STRONG_INLINE Packet8h ploadquad< Packet8h >(const Eigen::half *from)
Definition: PacketMath.h:972
EIGEN_STRONG_INLINE Packet4f Bf16ToF32(const Packet4bf &p)
Definition: PacketMath.h:3414
EIGEN_STRONG_INLINE Packet8h pround< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1061
EIGEN_STRONG_INLINE Packet8bf ploadquad< Packet8bf >(const bfloat16 *from)
Definition: PacketMath.h:1357
EIGEN_STRONG_INLINE Packet8h ploadu< Packet8h >(const Eigen::half *from)
Definition: PacketMath.h:950
EIGEN_STRONG_INLINE double pfirst< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:699
EIGEN_STRONG_INLINE Packet8f pmin< PropagateNaN, Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:433
EIGEN_STRONG_INLINE Packet4d padd< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:263
EIGEN_STRONG_INLINE Packet8f pdiv< Packet8f >(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:311
EIGEN_STRONG_INLINE Packet8h padd< Packet8h >(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1100
__m256 Packet8f
Definition: PacketMath.h:31
EIGEN_STRONG_INLINE Packet4d pset1frombits< Packet4d >(uint64_t from)
Definition: PacketMath.h:245
EIGEN_STRONG_INLINE double predux_mul< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:819
EIGEN_STRONG_INLINE bfloat16 pfirst< Packet8bf >(const Packet8bf &from)
Definition: PacketMath.h:1327
EIGEN_STRONG_INLINE Eigen::half pfirst< Packet8h >(const Packet8h &from)
Definition: PacketMath.h:942
DoublePacket< Packet > padd(const DoublePacket< Packet > &a, const DoublePacket< Packet > &b)
Definition: GeneralBlockPanelKernel.h:689
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: PacketMath.h:795
EIGEN_STRONG_INLINE Packet8f pset1< Packet8f >(const float &from)
Definition: PacketMath.h:240
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:433
EIGEN_STRONG_INLINE Packet8i pand< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: PacketMath.h:491
EIGEN_STRONG_INLINE Packet4d pfloor< Packet4d >(const Packet4d &a)
Definition: PacketMath.h:456
EIGEN_STRONG_INLINE Packet8bf ploadu< Packet8bf >(const bfloat16 *from)
Definition: PacketMath.h:1335
EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f &a)
Definition: PacketMath.h:322
EIGEN_STRONG_INLINE Packet4d por< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:500
EIGEN_DEVICE_FUNC Packet16b pselect(const Packet16b &mask, const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:391
EIGEN_STRONG_INLINE Packet8bf pmax< Packet8bf >(const Packet8bf &a, const Packet8bf &b)
Definition: PacketMath.h:1380
EIGEN_STRONG_INLINE Packet8bf pset1< Packet8bf >(const bfloat16 &from)
Definition: PacketMath.h:1323
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
Definition: PacketMath.h:870
EIGEN_STRONG_INLINE Packet4d pload1< Packet4d >(const double *from)
Definition: PacketMath.h:257
__m256d Packet4d
Definition: PacketMath.h:33
EIGEN_STRONG_INLINE Packet4d pandnot< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:520
EIGEN_STRONG_INLINE Packet4d psub< Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:275
EIGEN_STRONG_INLINE Packet8f ptrue< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:469
EIGEN_STRONG_INLINE Packet8i pmul< Packet8i >(const Packet8i &a, const Packet8i &b)
Definition: PacketMath.h:301
EIGEN_STRONG_INLINE bfloat16 predux_max< Packet8bf >(const Packet8bf &a)
Definition: PacketMath.h:1495
EIGEN_STRONG_INLINE Packet8h pload< Packet8h >(const Eigen::half *from)
Definition: PacketMath.h:946
eigen_packet_wrapper< __m128i, 3 > Packet8bf
Definition: PacketMath.h:35
EIGEN_DEVICE_FUNC void pscatter< float, Packet8f >(float *to, const Packet8f &from, Index stride)
Definition: PacketMath.h:650
EIGEN_STRONG_INLINE Packet4d pmax< PropagateNaN, Packet4d >(const Packet4d &a, const Packet4d &b)
Definition: PacketMath.h:445
EIGEN_STRONG_INLINE Packet8f print< Packet8f >(const Packet8f &a)
Definition: PacketMath.h:449
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: PacketMath.h:869
EIGEN_STRONG_INLINE Eigen::half predux_mul< Packet8h >(const Packet8h &a)
Definition: PacketMath.h:1173
EIGEN_STRONG_INLINE void pstoreu< bfloat16 >(bfloat16 *to, const Packet4bf &from)
Definition: PacketMath.h:3446
::uint64_t uint64_t
Definition: Meta.h:58
::uint16_t uint16_t
Definition: Meta.h:54
::uint32_t uint32_t
Definition: Meta.h:56
::uint8_t uint8_t
Definition: Meta.h:52
Namespace containing all symbols from the Eigen library.
Definition: MatrixExponential.h:16
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
Definition: Eigen_Colamd.h:50
static constexpr const unit_t< compound_unit< energy::joule, time::seconds > > h(6.626070040e-34)
Planck constant.
static constexpr const charge::coulomb_t e(1.6021766208e-19)
elementary charge.
static constexpr const velocity::meters_per_second_t c(299792458.0)
Speed of light in vacuum.
Definition: BFloat16.h:58
Definition: GenericPacketMath.h:1014
Packet packet[N]
Definition: GenericPacketMath.h:1015
Definition: GenericPacketMath.h:1027
bool select[N]
Definition: GenericPacketMath.h:1028
Definition: GenericPacketMath.h:43
@ HasRsqrt
Definition: GenericPacketMath.h:67
@ HasSin
Definition: GenericPacketMath.h:75
@ HasBlend
Definition: GenericPacketMath.h:60
@ HasNdtri
Definition: GenericPacketMath.h:90
@ HasCos
Definition: GenericPacketMath.h:76
@ HasCmp
Definition: GenericPacketMath.h:63
@ HasLog1p
Definition: GenericPacketMath.h:71
@ HasCeil
Definition: GenericPacketMath.h:101
@ HasExp
Definition: GenericPacketMath.h:68
@ HasRound
Definition: GenericPacketMath.h:98
@ HasRint
Definition: GenericPacketMath.h:99
@ HasSqrt
Definition: GenericPacketMath.h:66
@ HasErf
Definition: GenericPacketMath.h:88
@ HasBessel
Definition: GenericPacketMath.h:91
@ HasExpm1
Definition: GenericPacketMath.h:69
@ HasLog
Definition: GenericPacketMath.h:70
@ HasTanh
Definition: GenericPacketMath.h:83
@ HasFloor
Definition: GenericPacketMath.h:100
@ HasDiv
Definition: GenericPacketMath.h:65
Definition: GenericPacketMath.h:160
Packet8h type
Definition: PacketMath.h:115
Packet8h half
Definition: PacketMath.h:117
Packet8bf half
Definition: PacketMath.h:161
Packet8bf type
Definition: PacketMath.h:158
Packet2d half
Definition: PacketMath.h:92
Packet4d type
Definition: PacketMath.h:91
Packet8f type
Definition: PacketMath.h:60
Packet4f half
Definition: PacketMath.h:61
Definition: GenericPacketMath.h:107
@ HasHalfPacket
Definition: GenericPacketMath.h:114
@ size
Definition: GenericPacketMath.h:112
@ AlignedOnScalar
Definition: GenericPacketMath.h:113
@ Vectorizable
Definition: GenericPacketMath.h:111
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
Packet2d half
Definition: PacketMath.h:226
double type
Definition: PacketMath.h:225
uint8_t mask_t
Definition: PacketMath.h:221
Packet8i integer_packet
Definition: PacketMath.h:220
Packet4f half
Definition: PacketMath.h:219
float type
Definition: PacketMath.h:218
Definition: GenericPacketMath.h:133
T type
Definition: GenericPacketMath.h:134
T half
Definition: GenericPacketMath.h:135
@ masked_load_available
Definition: GenericPacketMath.h:141
@ size
Definition: GenericPacketMath.h:138
@ masked_store_available
Definition: GenericPacketMath.h:142
@ vectorizable
Definition: GenericPacketMath.h:140
@ alignment
Definition: GenericPacketMath.h:139