10#ifndef EIGEN_PACKET_MATH_SSE_H
11#define EIGEN_PACKET_MATH_SSE_H
17#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
18#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
21#if !defined(EIGEN_VECTORIZE_AVX) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS)
24#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
27#ifdef EIGEN_VECTORIZE_FMA
28#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
29#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
33#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
39typedef eigen_packet_wrapper<__m128>
Packet4f;
40typedef eigen_packet_wrapper<__m128d>
Packet2d;
55template<
int p,
int q,
int r,
int s>
57 enum {
mask = (s)<<6|(r)<<4|(q)<<2|(p) };
61#define vec4f_swizzle1(v,p,q,r,s) \
62 Packet4f(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), (shuffle_mask<p,q,r,s>::mask))))
64#define vec4i_swizzle1(v,p,q,r,s) \
65 Packet4i(_mm_shuffle_epi32( v, (shuffle_mask<p,q,r,s>::mask)))
67#define vec2d_swizzle1(v,p,q) \
68 Packet2d(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), (shuffle_mask<2*p,2*p+1,2*q,2*q+1>::mask))))
70#define vec4f_swizzle2(a,b,p,q,r,s) \
71 Packet4f(_mm_shuffle_ps( (a), (b), (shuffle_mask<p,q,r,s>::mask)))
73#define vec4i_swizzle2(a,b,p,q,r,s) \
74 Packet4i(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), (shuffle_mask<p,q,r,s>::mask)))))
92#define vec4f_duplane(a,p) \
93 vec4f_swizzle2(a,a,p,p,p,p)
95#define vec2d_swizzle2(a,b,mask) \
96 Packet2d(_mm_shuffle_pd(a,b,mask))
106#define vec2d_duplane(a,p) \
107 vec2d_swizzle2(a,a,(p<<1)|p)
109#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
110 const Packet4f p4f_##NAME = pset1<Packet4f>(X)
112#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
113 const Packet2d p2d_##NAME = pset1<Packet2d>(X)
115#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
116 const Packet4f p4f_##NAME = pset1frombits<Packet4f>(X)
118#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
119 const Packet4i p4i_##NAME = pset1<Packet4i>(X)
124#ifndef EIGEN_VECTORIZE_AVX
126struct packet_traits<float> : default_packet_traits {
152#ifdef EIGEN_VECTORIZE_SSE4_1
159struct packet_traits<double> : default_packet_traits {
177#ifdef EIGEN_VECTORIZE_SSE4_1
244#ifndef EIGEN_VECTORIZE_AVX
245template<>
struct scalar_div_cost<float,true> {
enum {
value = 7 }; };
246template<>
struct scalar_div_cost<double,true> {
enum {
value = 8 }; };
249#if EIGEN_COMP_MSVC==1500
279#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
303#ifdef EIGEN_VECTORIZE_SSE3
304 return _mm_addsub_ps(a,
b);
306 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x0,0x80000000,0x0));
314#ifdef EIGEN_VECTORIZE_SSE3
315 return _mm_addsub_pd(a,
b);
317 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x0));
324 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
325 return _mm_xor_ps(a,mask);
329 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
330 return _mm_xor_pd(a,mask);
350#ifdef EIGEN_VECTORIZE_SSE4_1
351 return _mm_mullo_epi32(a,
b);
371#ifdef EIGEN_VECTORIZE_FMA
376#ifdef EIGEN_VECTORIZE_SSE4_1
378 return _mm_blendv_ps(
b,a,mask);
382 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(
b),_mm_castsi128_ps(a),_mm_castsi128_ps(mask)));
388 return _mm_blendv_epi8(
b,a,mask);
392 Packet16b a_part = _mm_and_si128(mask, a);
394 return _mm_or_si128(a_part, b_part);
403 return _mm_castsi128_ps(_mm_cmpeq_epi32(
b,
b));
408 return _mm_castsi128_pd(_mm_cmpeq_epi32(
b,
b));
447#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
452 #ifdef EIGEN_VECTORIZE_AVX
454 asm(
"vminps %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
457 asm(
"minps %[a], %[res]" : [res]
"+x" (res) : [a]
"x" (a));
462 return _mm_min_ps(
b, a);
466#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
471 #ifdef EIGEN_VECTORIZE_AVX
473 asm(
"vminpd %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
476 asm(
"minpd %[a], %[res]" : [res]
"+x" (res) : [a]
"x" (a));
481 return _mm_min_pd(
b, a);
486#ifdef EIGEN_VECTORIZE_SSE4_1
487 return _mm_min_epi32(a,
b);
491 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,
b));
497#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
502 #ifdef EIGEN_VECTORIZE_AVX
504 asm(
"vmaxps %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
507 asm(
"maxps %[a], %[res]" : [res]
"+x" (res) : [a]
"x" (a));
512 return _mm_max_ps(
b, a);
516#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
521 #ifdef EIGEN_VECTORIZE_AVX
523 asm(
"vmaxpd %[a], %[b], %[res]" : [res]
"=x" (res) : [a]
"x" (a), [
b]
"x" (
b));
526 asm(
"maxpd %[a], %[res]" : [res]
"+x" (res) : [a]
"x" (a));
531 return _mm_max_pd(
b, a);
536#ifdef EIGEN_VECTORIZE_SSE4_1
537 return _mm_max_epi32(a,
b);
541 return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,
b));
545template <
typename Packet,
typename Op>
549 Packet not_nan_mask_a =
pcmp_eq(a, a);
551 return pselect<Packet>(not_nan_mask_a, m,
b);
554template <
typename Packet,
typename Op>
558 Packet not_nan_mask_a =
pcmp_eq(a, a);
560 return pselect<Packet>(not_nan_mask_a, m, a);
603 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
604 return _mm_and_ps(a,mask);
608 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
609 return _mm_and_pd(a,mask);
613 #ifdef EIGEN_VECTORIZE_SSSE3
614 return _mm_abs_epi32(a);
616 Packet4i aux = _mm_srai_epi32(a,31);
617 return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
621#ifdef EIGEN_VECTORIZE_SSE4_1
627 return _mm_round_ps(
padd(
por(
pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);
632 const Packet2d mask = _mm_castsi128_pd(_mm_set_epi64x(0x8000000000000000ull, 0x8000000000000000ull));
633 const Packet2d prev0dot5 = _mm_castsi128_pd(_mm_set_epi64x(0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull));
634 return _mm_round_pd(
padd(
por(
pand(a, mask), prev0dot5), a), _MM_FROUND_TO_ZERO);
679 Packet4f mask = _mm_cmpgt_ps(tmp, a);
680 mask =
pand(mask, cst_1);
681 return psub(tmp, mask);
689 Packet2d mask = _mm_cmpgt_pd(tmp, a);
690 mask =
pand(mask, cst_1);
691 return psub(tmp, mask);
699 Packet4f mask = _mm_cmplt_ps(tmp, a);
700 mask =
pand(mask, cst_1);
701 return padd(tmp, mask);
709 Packet2d mask = _mm_cmplt_pd(tmp, a);
710 mask =
pand(mask, cst_1);
711 return padd(tmp, mask);
723 #if (EIGEN_COMP_MSVC==1600)
726 __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (
const __m64*)(
from));
727 res = _mm_loadh_pi(res, (
const __m64*)(
from+2));
730 return _mm_loadu_ps(
from);
739 return _mm_loadu_ps(
from);
746 return _mm_loadu_pd(
from);
751 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(
from));
755 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(
from));
761 return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(
reinterpret_cast<const double*
>(
from))), 0, 0, 1, 1);
768 tmp = _mm_loadl_epi64(
reinterpret_cast<const __m128i*
>(
from));
776 __m128i tmp = _mm_castpd_si128(pload1<Packet2d>(
reinterpret_cast<const double*
>(
from)));
777 return _mm_unpacklo_epi8(tmp, tmp);
784 __m128i tmp = _mm_castps_si128(pload1<Packet4f>(
reinterpret_cast<const float*
>(
from)));
785 tmp = _mm_unpacklo_epi8(tmp, tmp);
786 return _mm_unpacklo_epi16(tmp, tmp);
801 return _mm_set_ps(
from[3*stride],
from[2*stride],
from[1*stride],
from[0*stride]);
805 return _mm_set_pd(
from[1*stride],
from[0*stride]);
809 return _mm_set_epi32(
from[3*stride],
from[2*stride],
from[1*stride],
from[0*stride]);
814 return _mm_set_epi8(
from[15*stride],
from[14*stride],
from[13*stride],
from[12*stride],
822 to[stride*0] = _mm_cvtss_f32(
from);
823 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(
from,
from, 1));
824 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(
from,
from, 2));
825 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(
from,
from, 3));
829 to[stride*0] = _mm_cvtsd_f64(
from);
830 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(
from,
from, 1));
834 to[stride*0] = _mm_cvtsi128_si32(
from);
835 to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(
from, 1));
836 to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(
from, 2));
837 to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(
from, 3));
841 to[4*stride*0] = _mm_cvtsi128_si32(
from);
842 to[4*stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(
from, 1));
843 to[4*stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(
from, 2));
844 to[4*stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(
from, 3));
861#if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900
867#ifndef EIGEN_VECTORIZE_AVX
873#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
879#elif EIGEN_COMP_MSVC_STRICT
895#ifdef EIGEN_VECTORIZE_SSSE3
896 __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
897 return _mm_shuffle_epi8(a, mask);
899 Packet16b tmp = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3));
900 tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
901 return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
914 __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(
pand(a, cst_exp_mask)), 52);
937 const Packet4i bias = _mm_set_epi32(0, 1023, 0, 1023);
938 Packet4i b = parithmetic_shift_right<2>(ei);
942 c = _mm_castsi128_pd(_mm_slli_epi64(
padd(
b, bias), 52));
963#ifdef EIGEN_VECTORIZE_SSE3
964 a0 = _mm_loaddup_pd(a+0);
965 a1 = _mm_loaddup_pd(a+1);
966 a2 = _mm_loaddup_pd(a+2);
967 a3 = _mm_loaddup_pd(a+3);
981 vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
982 vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
983 vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
984 vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
995 Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
1011#ifdef EIGEN_VECTORIZE_SSSE3
1014 Packet4i tmp0 = _mm_hadd_epi32(a,a);
1021 Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
1027 Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a));
1037 Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
1051 return (aux[0] * aux[1]) * (aux[2] * aux[3]);
1055 Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a,a));
1063 Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
1072#ifdef EIGEN_VECTORIZE_SSE4_1
1073 Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
1080 int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
1081 int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
1082 return aux0<aux2 ? aux0 : aux2;
1089 Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
1098#ifdef EIGEN_VECTORIZE_SSE4_1
1099 Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
1106 int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
1107 int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
1108 return aux0>aux2 ? aux0 : aux2;
1120 return _mm_movemask_ps(x) != 0x0;
1130 __m128d tmp = _mm_unpackhi_pd(kernel.
packet[0], kernel.
packet[1]);
1137 __m128i T0 = _mm_unpacklo_epi32(kernel.
packet[0], kernel.
packet[1]);
1138 __m128i T1 = _mm_unpacklo_epi32(kernel.
packet[2], kernel.
packet[3]);
1139 __m128i T2 = _mm_unpackhi_epi32(kernel.
packet[0], kernel.
packet[1]);
1140 __m128i T3 = _mm_unpackhi_epi32(kernel.
packet[2], kernel.
packet[3]);
1142 kernel.
packet[0] = _mm_unpacklo_epi64(T0, T1);
1143 kernel.
packet[1] = _mm_unpackhi_epi64(T0, T1);
1144 kernel.
packet[2] = _mm_unpacklo_epi64(T2, T3);
1145 kernel.
packet[3] = _mm_unpackhi_epi64(T2, T3);
1150 __m128i T0 = _mm_unpacklo_epi8(kernel.
packet[0], kernel.
packet[1]);
1151 __m128i T1 = _mm_unpackhi_epi8(kernel.
packet[0], kernel.
packet[1]);
1152 __m128i T2 = _mm_unpacklo_epi8(kernel.
packet[2], kernel.
packet[3]);
1153 __m128i T3 = _mm_unpackhi_epi8(kernel.
packet[2], kernel.
packet[3]);
1154 kernel.
packet[0] = _mm_unpacklo_epi16(T0, T2);
1155 kernel.
packet[1] = _mm_unpackhi_epi16(T0, T2);
1156 kernel.
packet[2] = _mm_unpacklo_epi16(T1, T3);
1157 kernel.
packet[3] = _mm_unpackhi_epi16(T1, T3);
1173 __m128i t0 = _mm_unpacklo_epi8(kernel.
packet[0], kernel.
packet[1]);
1174 __m128i t1 = _mm_unpackhi_epi8(kernel.
packet[0], kernel.
packet[1]);
1175 __m128i t2 = _mm_unpacklo_epi8(kernel.
packet[2], kernel.
packet[3]);
1176 __m128i t3 = _mm_unpackhi_epi8(kernel.
packet[2], kernel.
packet[3]);
1177 __m128i t4 = _mm_unpacklo_epi8(kernel.
packet[4], kernel.
packet[5]);
1178 __m128i t5 = _mm_unpackhi_epi8(kernel.
packet[4], kernel.
packet[5]);
1179 __m128i t6 = _mm_unpacklo_epi8(kernel.
packet[6], kernel.
packet[7]);
1180 __m128i t7 = _mm_unpackhi_epi8(kernel.
packet[6], kernel.
packet[7]);
1181 __m128i t8 = _mm_unpacklo_epi8(kernel.
packet[8], kernel.
packet[9]);
1182 __m128i t9 = _mm_unpackhi_epi8(kernel.
packet[8], kernel.
packet[9]);
1183 __m128i ta = _mm_unpacklo_epi8(kernel.
packet[10], kernel.
packet[11]);
1184 __m128i tb = _mm_unpackhi_epi8(kernel.
packet[10], kernel.
packet[11]);
1185 __m128i tc = _mm_unpacklo_epi8(kernel.
packet[12], kernel.
packet[13]);
1186 __m128i td = _mm_unpackhi_epi8(kernel.
packet[12], kernel.
packet[13]);
1187 __m128i te = _mm_unpacklo_epi8(kernel.
packet[14], kernel.
packet[15]);
1188 __m128i tf = _mm_unpackhi_epi8(kernel.
packet[14], kernel.
packet[15]);
1190 __m128i s0 = _mm_unpacklo_epi16(t0, t2);
1191 __m128i s1 = _mm_unpackhi_epi16(t0, t2);
1192 __m128i s2 = _mm_unpacklo_epi16(t1, t3);
1193 __m128i s3 = _mm_unpackhi_epi16(t1, t3);
1194 __m128i s4 = _mm_unpacklo_epi16(t4, t6);
1195 __m128i s5 = _mm_unpackhi_epi16(t4, t6);
1196 __m128i s6 = _mm_unpacklo_epi16(t5, t7);
1197 __m128i s7 = _mm_unpackhi_epi16(t5, t7);
1198 __m128i s8 = _mm_unpacklo_epi16(t8, ta);
1199 __m128i s9 = _mm_unpackhi_epi16(t8, ta);
1200 __m128i sa = _mm_unpacklo_epi16(t9, tb);
1201 __m128i sb = _mm_unpackhi_epi16(t9, tb);
1202 __m128i sc = _mm_unpacklo_epi16(tc, te);
1203 __m128i sd = _mm_unpackhi_epi16(tc, te);
1204 __m128i se = _mm_unpacklo_epi16(td, tf);
1205 __m128i sf = _mm_unpackhi_epi16(td, tf);
1207 __m128i u0 = _mm_unpacklo_epi32(s0, s4);
1208 __m128i u1 = _mm_unpackhi_epi32(s0, s4);
1209 __m128i u2 = _mm_unpacklo_epi32(s1, s5);
1210 __m128i u3 = _mm_unpackhi_epi32(s1, s5);
1211 __m128i u4 = _mm_unpacklo_epi32(s2, s6);
1212 __m128i u5 = _mm_unpackhi_epi32(s2, s6);
1213 __m128i u6 = _mm_unpacklo_epi32(s3, s7);
1214 __m128i u7 = _mm_unpackhi_epi32(s3, s7);
1215 __m128i u8 = _mm_unpacklo_epi32(s8, sc);
1216 __m128i u9 = _mm_unpackhi_epi32(s8, sc);
1217 __m128i ua = _mm_unpacklo_epi32(s9, sd);
1218 __m128i ub = _mm_unpackhi_epi32(s9, sd);
1219 __m128i uc = _mm_unpacklo_epi32(sa, se);
1220 __m128i ud = _mm_unpackhi_epi32(sa, se);
1221 __m128i ue = _mm_unpacklo_epi32(sb, sf);
1222 __m128i uf = _mm_unpackhi_epi32(sb, sf);
1224 kernel.
packet[0] = _mm_unpacklo_epi64(u0, u8);
1225 kernel.
packet[1] = _mm_unpackhi_epi64(u0, u8);
1226 kernel.
packet[2] = _mm_unpacklo_epi64(u1, u9);
1227 kernel.
packet[3] = _mm_unpackhi_epi64(u1, u9);
1228 kernel.
packet[4] = _mm_unpacklo_epi64(u2, ua);
1229 kernel.
packet[5] = _mm_unpackhi_epi64(u2, ua);
1230 kernel.
packet[6] = _mm_unpacklo_epi64(u3, ub);
1231 kernel.
packet[7] = _mm_unpackhi_epi64(u3, ub);
1232 kernel.
packet[8] = _mm_unpacklo_epi64(u4, uc);
1233 kernel.
packet[9] = _mm_unpackhi_epi64(u4, uc);
1234 kernel.
packet[10] = _mm_unpacklo_epi64(u5, ud);
1235 kernel.
packet[11] = _mm_unpackhi_epi64(u5, ud);
1236 kernel.
packet[12] = _mm_unpacklo_epi64(u6, ue);
1237 kernel.
packet[13] = _mm_unpackhi_epi64(u6, ue);
1238 kernel.
packet[14] = _mm_unpacklo_epi64(u7, uf);
1239 kernel.
packet[15] = _mm_unpackhi_epi64(u7, uf);
1243 const __m128i zero = _mm_setzero_si128();
1244 const __m128i select = _mm_set_epi32(ifPacket.
select[3], ifPacket.
select[2], ifPacket.
select[1], ifPacket.
select[0]);
1245 __m128i false_mask = _mm_cmpeq_epi32(select, zero);
1246#ifdef EIGEN_VECTORIZE_SSE4_1
1247 return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
1249 return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
1253 const __m128 zero = _mm_setzero_ps();
1255 __m128 false_mask = _mm_cmpeq_ps(select, zero);
1256#ifdef EIGEN_VECTORIZE_SSE4_1
1257 return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
1259 return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
1263 const __m128d zero = _mm_setzero_pd();
1264 const __m128d select = _mm_set_pd(ifPacket.
select[1], ifPacket.
select[0]);
1265 __m128d false_mask = _mm_cmpeq_pd(select, zero);
1266#ifdef EIGEN_VECTORIZE_SSE4_1
1267 return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
1269 return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
1274#ifdef EIGEN_VECTORIZE_FMA
1276 return ::fmaf(a,
b,
c);
1294template<>
struct is_arithmetic<Packet4h> {
enum {
value =
true }; };
1297struct packet_traits<
Eigen::half> : default_packet_traits {
1298 typedef Packet4h
type;
1300 typedef Packet4h
half;
1341 __int64_t a64 = _mm_cvtm64_si64(a.x);
1342 __int64_t b64 = _mm_cvtm64_si64(
b.x);
1359 result.x = _mm_set_pi16(
h[3].x,
h[2].x,
h[1].x,
h[0].x);
1364 __int64_t a64 = _mm_cvtm64_si64(a.x);
1365 __int64_t b64 = _mm_cvtm64_si64(
b.x);
1382 result.x = _mm_set_pi16(
h[3].x,
h[2].x,
h[1].x,
h[0].x);
1387 __int64_t a64 = _mm_cvtm64_si64(a.x);
1388 __int64_t b64 = _mm_cvtm64_si64(
b.x);
1405 result.x = _mm_set_pi16(
h[3].x,
h[2].x,
h[1].x,
h[0].x);
1410 __int64_t a64 = _mm_cvtm64_si64(a.x);
1411 __int64_t b64 = _mm_cvtm64_si64(
b.x);
1428 result.x = _mm_set_pi16(
h[3].x,
h[2].x,
h[1].x,
h[0].x);
1434 result.x = _mm_cvtsi64_m64(*
reinterpret_cast<const __int64_t*
>(
from));
1440 result.x = _mm_cvtsi64_m64(*
reinterpret_cast<const __int64_t*
>(
from));
1445 __int64_t r = _mm_cvtm64_si64(
from.x);
1446 *(
reinterpret_cast<__int64_t*
>(to)) = r;
1450 __int64_t r = _mm_cvtm64_si64(
from.x);
1451 *(
reinterpret_cast<__int64_t*
>(to)) = r;
1456 return pset1<Packet4h>(*
from);
1468 __int64_t a = _mm_cvtm64_si64(
from.x);
1469 to[stride*0].
x =
static_cast<unsigned short>(a);
1470 to[stride*1].
x =
static_cast<unsigned short>(a >> 16);
1471 to[stride*2].
x =
static_cast<unsigned short>(a >> 32);
1472 to[stride*3].
x =
static_cast<unsigned short>(a >> 48);
1477 __m64 T0 = _mm_unpacklo_pi16(kernel.packet[0].x, kernel.packet[1].x);
1478 __m64 T1 = _mm_unpacklo_pi16(kernel.packet[2].x, kernel.packet[3].x);
1479 __m64 T2 = _mm_unpackhi_pi16(kernel.packet[0].x, kernel.packet[1].x);
1480 __m64 T3 = _mm_unpackhi_pi16(kernel.packet[2].x, kernel.packet[3].x);
1482 kernel.packet[0].x = _mm_unpacklo_pi32(T0, T1);
1483 kernel.packet[1].x = _mm_unpackhi_pi32(T0, T1);
1484 kernel.packet[2].x = _mm_unpacklo_pi32(T2, T3);
1485 kernel.packet[3].x = _mm_unpackhi_pi32(T2, T3);
1495#if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900
1497static inline __m128 _mm_castpd_ps (__m128d x) {
return reinterpret_cast<__m128&
>(x); }
1498static inline __m128i _mm_castpd_si128(__m128d x) {
return reinterpret_cast<__m128i&
>(x); }
1499static inline __m128d _mm_castps_pd (__m128 x) {
return reinterpret_cast<__m128d&
>(x); }
1500static inline __m128i _mm_castps_si128(__m128 x) {
return reinterpret_cast<__m128i&
>(x); }
1501static inline __m128 _mm_castsi128_ps(__m128i x) {
return reinterpret_cast<__m128&
>(x); }
1502static inline __m128d _mm_castsi128_pd(__m128i x) {
return reinterpret_cast<__m128d&
>(x); }
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:35
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:27
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:39
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:31
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:986
#define EIGEN_FAST_MATH
Allows to disable some optimizations which might affect the accuracy of the result.
Definition: Macros.h:49
#define EIGEN_STRONG_INLINE
Definition: Macros.h:927
#define EIGEN_OPTIMIZATION_BARRIER(X)
Definition: Macros.h:1154
#define vec2d_swizzle1(v, p, q)
Definition: PacketMath.h:67
#define vec4i_swizzle1(v, p, q, r, s)
Definition: PacketMath.h:64
#define vec4i_swizzle2(a, b, p, q, r, s)
Definition: PacketMath.h:73
and restrictions which apply to each piece of software is included later in this file and or inside of the individual applicable source files The disclaimer of warranty in the WPILib license above applies to all code in and nothing in any of the other licenses gives permission to use the names of FIRST nor the names of the WPILib contributors to endorse or promote products derived from this software The following pieces of software have additional or alternate and or Google Inc All rights reserved Redistribution and use in source and binary with or without are permitted provided that the following conditions are this list of conditions and the following disclaimer *Redistributions in binary form must reproduce the above copyright this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution *Neither the name of Google Inc nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED BUT NOT LIMITED THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY OR CONSEQUENTIAL WHETHER IN STRICT OR EVEN IF ADVISED OF THE POSSIBILITY OF SUCH January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the whether in Source or Object that is based or other modifications as a an original work of authorship For the purposes of this Derivative Works shall not include works that remain separable from
Definition: ThirdPartyNotices.txt:128
auto fma(const UnitTypeLhs x, const UnitMultiply y, const UnitAdd z) noexcept -> decltype(x *y)
Multiply-add.
Definition: math.h:740
@ Aligned16
Data pointer is aligned on a 16 bytes boundary.
Definition: Constants.h:235
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:495
EIGEN_STRONG_INLINE Packet8f pcmp_lt_or_nan(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:347
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:427
EIGEN_STRONG_INLINE Packet8f pblend(const Selector< 8 > &ifPacket, const Packet8f &thenPacket, const Packet8f &elsePacket)
Definition: PacketMath.h:921
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: PacketMath.h:627
EIGEN_STRONG_INLINE Packet pminmax_propagate_numbers(const Packet &a, const Packet &b, Op op)
Definition: PacketMath.h:546
EIGEN_DEVICE_FUNC void pscatter< bool, Packet16b >(bool *to, const Packet16b &from, Index stride)
Definition: PacketMath.h:839
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1000
EIGEN_STRONG_INLINE void pstore< bool >(bool *to, const Packet16b &from)
Definition: PacketMath.h:792
EIGEN_STRONG_INLINE bool predux< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:1026
__m128d Packet2d
Definition: PacketMath.h:43
EIGEN_STRONG_INLINE Packet8f pcmp_eq(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:348
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:215
EIGEN_DEVICE_FUNC void ptranspose(PacketBlock< Packet8f, 8 > &kernel)
Definition: PacketMath.h:863
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:446
EIGEN_STRONG_INLINE Packet16b ploaddup< Packet16b >(const bool *from)
Definition: PacketMath.h:774
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:290
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:428
EIGEN_STRONG_INLINE Packet16b pmul< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:364
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: PacketMath.h:247
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet8i &from)
Definition: PacketMath.h:624
EIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:76
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:585
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:289
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:419
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:577
__m128 Packet4f
Definition: PacketMath.h:42
EIGEN_STRONG_INLINE Packet8h pselect(const Packet8h &mask, const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1057
EIGEN_STRONG_INLINE Packet8i plogical_shift_left(Packet8i a)
Definition: PacketMath.h:567
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: PacketMath.h:259
EIGEN_STRONG_INLINE Packet2d paddsub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:312
EIGEN_STRONG_INLINE Packet16b ploadquad< Packet16b >(const bool *from)
Definition: PacketMath.h:783
EIGEN_STRONG_INLINE Packet16b psub< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:298
EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:102
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:885
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:413
EIGEN_STRONG_INLINE Packet16b por< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:420
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
Definition: PacketMath.h:765
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1087
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet8f &from)
Definition: PacketMath.h:622
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: PacketMath.h:763
EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i &a, const Packet4i &b, const Packet4i &c)
Definition: PacketMath.h:370
EIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:80
EIGEN_STRONG_INLINE Packet16b pload< Packet16b >(const bool *from)
Definition: PacketMath.h:718
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:418
EIGEN_STRONG_INLINE Packet2d pldexp< Packet2d >(const Packet2d &a, const Packet2d &exponent)
Definition: PacketMath.h:928
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:573
eigen_packet_wrapper< __m128i, 1 > Packet16b
Definition: PacketMath.h:47
EIGEN_STRONG_INLINE void pstore1< Packet2d >(double *to, const double &a)
Definition: PacketMath.h:855
EIGEN_DEVICE_FUNC void pscatter< int, Packet4i >(int *to, const Packet4i &from, Index stride)
Definition: PacketMath.h:832
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
Definition: PacketMath.h:759
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:417
EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f &a)
Definition: PacketMath.h:725
EIGEN_STRONG_INLINE Packet16b pset1< Packet16b >(const bool &from)
Definition: PacketMath.h:261
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1070
EIGEN_STRONG_INLINE Packet8f pcmp_le(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:345
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:424
EIGEN_STRONG_INLINE Packet4f print(const Packet4f &a)
Definition: PacketMath.h:646
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1092
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:346
EIGEN_STRONG_INLINE void pstore1< Packet4f >(float *to, const float &a)
Definition: PacketMath.h:849
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:524
EIGEN_DEVICE_FUNC Packet4f pgather< float, Packet4f >(const float *from, Index stride)
Definition: PacketMath.h:799
EIGEN_STRONG_INLINE Packet4f paddsub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:301
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: PacketMath.h:258
EIGEN_STRONG_INLINE Packet pminmax_propagate_nan(const Packet &a, const Packet &b, Op op)
Definition: PacketMath.h:555
EIGEN_STRONG_INLINE void punpackp(Packet4f *vecs)
Definition: PacketMath.h:979
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
Definition: PacketMath.h:715
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1044
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1042
EIGEN_STRONG_INLINE Packet2d ptrue< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:406
EIGEN_STRONG_INLINE Packet8i plogical_shift_right(Packet8i a)
Definition: PacketMath.h:557
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:593
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:581
EIGEN_STRONG_INLINE Packet8f pcmp_lt(const Packet8f &a, const Packet8f &b)
Definition: PacketMath.h:346
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: PacketMath.h:623
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:291
EIGEN_STRONG_INLINE Packet4f pfloor< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:674
EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f &a)
Definition: PacketMath.h:295
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:429
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet8f &from)
Definition: PacketMath.h:626
EIGEN_STRONG_INLINE Packet16b pand< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:415
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:237
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:512
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:366
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition: PacketMath.h:716
EIGEN_STRONG_INLINE Packet16b ploadu< Packet16b >(const bool *from)
Definition: PacketMath.h:753
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:347
EIGEN_STRONG_INLINE Packet4f pfrexp< Packet4f >(const Packet4f &a, Packet4f &exponent)
Definition: PacketMath.h:905
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1035
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:569
EIGEN_STRONG_INLINE Packet16b ptrue< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:399
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: PacketMath.h:691
EIGEN_STRONG_INLINE Packet4d pfrexp_generic_get_biased_exponent(const Packet4d &a)
Definition: PacketMath.h:743
EIGEN_DEVICE_FUNC void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
Definition: PacketMath.h:827
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:589
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: PacketMath.h:748
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1040
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:367
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1066
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: PacketMath.h:257
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:297
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:565
EIGEN_DEVICE_FUNC void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
Definition: PacketMath.h:820
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
Definition: PacketMath.h:286
EIGEN_STRONG_INLINE bool pfirst< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:889
EIGEN_STRONG_INLINE void pstoreu< bool >(bool *to, const Packet16b &from)
Definition: PacketMath.h:797
EIGEN_STRONG_INLINE Packet4f pceil< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:694
const char * SsePrefetchPtrType
Definition: PacketMath.h:864
EIGEN_STRONG_INLINE Packet8i parithmetic_shift_right(Packet8i a)
Definition: PacketMath.h:547
EIGEN_STRONG_INLINE Packet8f peven_mask(const Packet8f &)
Definition: PacketMath.h:252
EIGEN_STRONG_INLINE void pbroadcast4< Packet2d >(const double *a, Packet2d &a0, Packet2d &a1, Packet2d &a2, Packet2d &a3)
Definition: PacketMath.h:960
EIGEN_STRONG_INLINE Packet4f pset1frombits< Packet4f >(unsigned int from)
Definition: PacketMath.h:263
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:696
EIGEN_STRONG_INLINE Packet4f pldexp< Packet4f >(const Packet4f &a, const Packet4f &exponent)
Definition: PacketMath.h:922
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: PacketMath.h:743
EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:88
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:422
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:484
EIGEN_STRONG_INLINE Packet2d pfrexp< Packet2d >(const Packet2d &a, Packet2d &exponent)
Definition: PacketMath.h:918
EIGEN_DEVICE_FUNC Packet16b pgather< bool, Packet16b >(const bool *from, Index stride)
Definition: PacketMath.h:812
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f &a, int p, int q, int r, int s)
Definition: PacketMath.h:113
EIGEN_STRONG_INLINE bool predux_any(const Packet8f &x)
Definition: PacketMath.h:857
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Default implementation of pldexp.
Definition: GenericPacketMathFunctions.h:85
EIGEN_STRONG_INLINE Packet4f ptrue< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:401
EIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:84
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &, const Packet2d &)
Definition: PacketMath.h:423
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1050
EIGEN_STRONG_INLINE Packet4i ptrue< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:398
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1047
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:887
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
Definition: PacketMath.h:287
EIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:98
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:987
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet8i &from)
Definition: PacketMath.h:628
EIGEN_STRONG_INLINE Packet2d pceil< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:704
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: PacketMath.h:736
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:348
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type pfirst(const Packet &a)
Definition: GenericPacketMath.h:844
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:414
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:465
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1019
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Default implementation of pfrexp.
Definition: GenericPacketMathFunctions.h:40
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:412
EIGEN_STRONG_INLINE Packet16b pxor< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:425
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:222
EIGEN_DEVICE_FUNC Packet4i pgather< int, Packet4i >(const int *from, Index stride)
Definition: PacketMath.h:807
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1096
EIGEN_DEVICE_FUNC Packet2d pgather< double, Packet2d >(const double *from, Index stride)
Definition: PacketMath.h:803
EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f &a)
Definition: PacketMath.h:707
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:534
EIGEN_STRONG_INLINE void pbroadcast4< Packet4f >(const float *a, Packet4f &a0, Packet4f &a1, Packet4f &a2, Packet4f &a3)
Definition: PacketMath.h:950
EIGEN_STRONG_INLINE Packet2d pset1frombits< Packet2d >(uint64_t from)
Definition: PacketMath.h:264
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
Definition: PacketMath.h:717
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:296
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:295
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
Definition: PacketMath.h:285
EIGEN_STRONG_INLINE Packet16b padd< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:293
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:515
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
Definition: PacketMath.h:693
EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f &a)
Definition: PacketMath.h:286
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:886
EIGEN_STRONG_INLINE bool predux_mul< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:1054
eigen_packet_wrapper< __m128i, 0 > Packet4i
Definition: PacketMath.h:46
EIGEN_STRONG_INLINE Packet2d pfloor< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:684
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1061
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: PacketMath.h:692
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:496
::uint64_t uint64_t
Definition: Meta.h:58
Namespace containing all symbols from the Eigen library.
Definition: Core:141
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
result
Definition: format.h:2564
Definition: Eigen_Colamd.h:50
static constexpr const unit_t< compound_unit< energy::joule, time::seconds > > h(6.626070040e-34)
Planck constant.
static constexpr const charge::coulomb_t e(1.6021766208e-19)
elementary charge.
static constexpr const velocity::meters_per_second_t c(299792458.0)
Speed of light in vacuum.
numext::uint16_t x
Definition: Half.h:104
Definition: GenericPacketMath.h:1014
Packet packet[N]
Definition: GenericPacketMath.h:1015
Definition: GenericPacketMath.h:1027
bool select[N]
Definition: GenericPacketMath.h:1028
Definition: GenericPacketMath.h:43
@ HasRsqrt
Definition: GenericPacketMath.h:67
@ HasSin
Definition: GenericPacketMath.h:75
@ HasBlend
Definition: GenericPacketMath.h:60
@ HasNdtri
Definition: GenericPacketMath.h:90
@ HasCos
Definition: GenericPacketMath.h:76
@ HasCmp
Definition: GenericPacketMath.h:63
@ HasShift
Definition: GenericPacketMath.h:49
@ HasLog1p
Definition: GenericPacketMath.h:71
@ HasCeil
Definition: GenericPacketMath.h:101
@ HasExp
Definition: GenericPacketMath.h:68
@ HasRound
Definition: GenericPacketMath.h:98
@ HasRint
Definition: GenericPacketMath.h:99
@ HasSqrt
Definition: GenericPacketMath.h:66
@ HasErf
Definition: GenericPacketMath.h:88
@ HasBessel
Definition: GenericPacketMath.h:91
@ HasExpm1
Definition: GenericPacketMath.h:69
@ HasLog
Definition: GenericPacketMath.h:70
@ HasTanh
Definition: GenericPacketMath.h:83
@ HasFloor
Definition: GenericPacketMath.h:100
@ HasDiv
Definition: GenericPacketMath.h:65
Definition: GenericPacketMath.h:160
Packet16b half
Definition: PacketMath.h:201
Packet16b type
Definition: PacketMath.h:200
Packet2d half
Definition: PacketMath.h:161
Packet2d type
Definition: PacketMath.h:160
Packet4f type
Definition: PacketMath.h:127
Packet4f half
Definition: PacketMath.h:128
Packet4i type
Definition: PacketMath.h:186
Packet4i half
Definition: PacketMath.h:187
Definition: GenericPacketMath.h:107
T type
Definition: GenericPacketMath.h:108
@ HasHalfPacket
Definition: GenericPacketMath.h:114
@ size
Definition: GenericPacketMath.h:112
@ AlignedOnScalar
Definition: GenericPacketMath.h:113
@ Vectorizable
Definition: GenericPacketMath.h:111
T half
Definition: GenericPacketMath.h:109
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
Definition: PacketMath.h:56
@ mask
Definition: PacketMath.h:57
Packet16b half
Definition: PacketMath.h:240
bool type
Definition: PacketMath.h:239
double type
Definition: PacketMath.h:229
Packet2d half
Definition: PacketMath.h:230
Packet4i integer_packet
Definition: PacketMath.h:225
Packet4f half
Definition: PacketMath.h:224
float type
Definition: PacketMath.h:223
int type
Definition: PacketMath.h:234
Packet4i half
Definition: PacketMath.h:235
Definition: GenericPacketMath.h:133
T type
Definition: GenericPacketMath.h:134
T half
Definition: GenericPacketMath.h:135
@ masked_load_available
Definition: GenericPacketMath.h:141
@ size
Definition: GenericPacketMath.h:138
@ masked_store_available
Definition: GenericPacketMath.h:142
@ vectorizable
Definition: GenericPacketMath.h:140
@ alignment
Definition: GenericPacketMath.h:139