WPILibC++ 2023.4.3
Redux.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
14namespace Eigen {
15
16namespace internal {
17
18// TODO
19// * implement other kind of vectorization
20// * factorize code
21
22/***************************************************************************
23* Part 1 : the logic deciding a strategy for vectorization and unrolling
24***************************************************************************/
25
26template<typename Func, typename Evaluator>
28{
29public:
31 enum {
33 InnerMaxSize = int(Evaluator::IsRowMajor)
34 ? Evaluator::MaxColsAtCompileTime
35 : Evaluator::MaxRowsAtCompileTime,
36 OuterMaxSize = int(Evaluator::IsRowMajor)
37 ? Evaluator::MaxRowsAtCompileTime
38 : Evaluator::MaxColsAtCompileTime,
40 : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
41 : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
42 };
43
44 enum {
45 MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
47 MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
49 };
50
51public:
52 enum {
55 : int(DefaultTraversal)
56 };
57
58public:
59 enum {
60 Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
61 : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
63 };
64
65public:
66 enum {
68 };
69
70#ifdef EIGEN_DEBUG_ASSIGN
71 static void debug()
72 {
73 std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
74 std::cerr.setf(std::ios::hex, std::ios::basefield);
75 EIGEN_DEBUG_VAR(Evaluator::Flags)
76 std::cerr.unsetf(std::ios::hex);
84 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
86 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
87 std::cerr << std::endl;
88 }
89#endif
90};
91
92/***************************************************************************
93* Part 2 : unrollers
94***************************************************************************/
95
96/*** no vectorization ***/
97
98template<typename Func, typename Evaluator, int Start, int Length>
100{
101 enum {
102 HalfLength = Length/2
103 };
104
105 typedef typename Evaluator::Scalar Scalar;
106
108 static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
109 {
112 }
113};
114
115template<typename Func, typename Evaluator, int Start>
116struct redux_novec_unroller<Func, Evaluator, Start, 1>
117{
118 enum {
119 outer = Start / Evaluator::InnerSizeAtCompileTime,
120 inner = Start % Evaluator::InnerSizeAtCompileTime
121 };
122
123 typedef typename Evaluator::Scalar Scalar;
124
126 static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
127 {
128 return eval.coeffByOuterInner(outer, inner);
129 }
130};
131
132// This is actually dead code and will never be called. It is required
133// to prevent false warnings regarding failed inlining though
134// for 0 length run() will never be called at all.
135template<typename Func, typename Evaluator, int Start>
136struct redux_novec_unroller<Func, Evaluator, Start, 0>
137{
138 typedef typename Evaluator::Scalar Scalar;
140 static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
141};
142
143/*** vectorization ***/
144
145template<typename Func, typename Evaluator, int Start, int Length>
147{
148 template<typename PacketType>
150 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
151 {
152 enum {
154 HalfLength = Length/2
155 };
156
157 return func.packetOp(
160 }
161};
162
163template<typename Func, typename Evaluator, int Start>
164struct redux_vec_unroller<Func, Evaluator, Start, 1>
165{
166 template<typename PacketType>
168 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
169 {
170 enum {
172 index = Start * PacketSize,
173 outer = index / int(Evaluator::InnerSizeAtCompileTime),
174 inner = index % int(Evaluator::InnerSizeAtCompileTime),
175 alignment = Evaluator::Alignment
176 };
177 return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
178 }
179};
180
181/***************************************************************************
182* Part 3 : implementation of all cases
183***************************************************************************/
184
185template<typename Func, typename Evaluator,
187 int Unrolling = redux_traits<Func, Evaluator>::Unrolling
188>
190
191template<typename Func, typename Evaluator>
192struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
193{
194 typedef typename Evaluator::Scalar Scalar;
195
196 template<typename XprType>
198 Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
199 {
200 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
201 Scalar res;
202 res = eval.coeffByOuterInner(0, 0);
203 for(Index i = 1; i < xpr.innerSize(); ++i)
204 res = func(res, eval.coeffByOuterInner(0, i));
205 for(Index i = 1; i < xpr.outerSize(); ++i)
206 for(Index j = 0; j < xpr.innerSize(); ++j)
207 res = func(res, eval.coeffByOuterInner(i, j));
208 return res;
209 }
210};
211
212template<typename Func, typename Evaluator>
214 : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
215{
217 typedef typename Evaluator::Scalar Scalar;
218 template<typename XprType>
220 Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
221 {
222 return Base::run(eval,func);
223 }
224};
225
226template<typename Func, typename Evaluator>
228{
229 typedef typename Evaluator::Scalar Scalar;
231
232 template<typename XprType>
233 static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
234 {
235 const Index size = xpr.size();
236
238 const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
239 enum {
240 alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
241 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
242 };
243 const Index alignedStart = internal::first_default_aligned(xpr);
244 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
245 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
246 const Index alignedEnd2 = alignedStart + alignedSize2;
247 const Index alignedEnd = alignedStart + alignedSize;
248 Scalar res;
249 if(alignedSize)
250 {
251 PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
252 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
253 {
254 PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
255 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
256 {
257 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
258 packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
259 }
260
261 packet_res0 = func.packetOp(packet_res0,packet_res1);
262 if(alignedEnd>alignedEnd2)
263 packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
264 }
265 res = func.predux(packet_res0);
266
267 for(Index index = 0; index < alignedStart; ++index)
268 res = func(res,eval.coeff(index));
269
270 for(Index index = alignedEnd; index < size; ++index)
271 res = func(res,eval.coeff(index));
272 }
273 else // too small to vectorize anything.
274 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
275 {
276 res = eval.coeff(0);
277 for(Index index = 1; index < size; ++index)
278 res = func(res,eval.coeff(index));
279 }
280
281 return res;
282 }
283};
284
285// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
286template<typename Func, typename Evaluator, int Unrolling>
287struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
288{
289 typedef typename Evaluator::Scalar Scalar;
291
292 template<typename XprType>
293 EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
294 {
295 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
296 const Index innerSize = xpr.innerSize();
297 const Index outerSize = xpr.outerSize();
298 enum {
300 };
301 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
302 Scalar res;
303 if(packetedInnerSize)
304 {
305 PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
306 for(Index j=0; j<outerSize; ++j)
307 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
308 packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
309
310 res = func.predux(packet_res);
311 for(Index j=0; j<outerSize; ++j)
312 for(Index i=packetedInnerSize; i<innerSize; ++i)
313 res = func(res, eval.coeffByOuterInner(j,i));
314 }
315 else // too small to vectorize anything.
316 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
317 {
319 }
320
321 return res;
322 }
323};
324
325template<typename Func, typename Evaluator>
327{
328 typedef typename Evaluator::Scalar Scalar;
329
331 enum {
333 Size = Evaluator::SizeAtCompileTime,
334 VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
335 };
336
337 template<typename XprType>
339 Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
340 {
342 eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
343 if (VectorizedSize > 0) {
345 if (VectorizedSize != Size)
347 return res;
348 }
349 else {
351 }
352 }
353};
354
355// evaluator adaptor
356template<typename _XprType>
357class redux_evaluator : public internal::evaluator<_XprType>
358{
360public:
361 typedef _XprType XprType;
363 explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
364
365 typedef typename XprType::Scalar Scalar;
366 typedef typename XprType::CoeffReturnType CoeffReturnType;
367 typedef typename XprType::PacketScalar PacketScalar;
368
369 enum {
370 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
371 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
372 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
373 Flags = Base::Flags & ~DirectAccessBit,
374 IsRowMajor = XprType::IsRowMajor,
375 SizeAtCompileTime = XprType::SizeAtCompileTime,
376 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
377 };
378
381 { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382
383 template<int LoadMode, typename PacketType>
385 PacketType packetByOuterInner(Index outer, Index inner) const
386 { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
387
388};
389
390} // end namespace internal
391
392/***************************************************************************
393* Part 4 : public API
394***************************************************************************/
395
396
397/** \returns the result of a full redux operation on the whole matrix or vector using \a func
398 *
399 * The template parameter \a BinaryOp is the type of the functor \a func which must be
400 * an associative operator. Both current C++98 and C++11 functor styles are handled.
401 *
402 * \warning the matrix must be not empty, otherwise an assertion is triggered.
403 *
404 * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
405 */
406template<typename Derived>
407template<typename Func>
408EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
409DenseBase<Derived>::redux(const Func& func) const
410{
411 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
412
413 typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
414 ThisEvaluator thisEval(derived());
415
416 // The initial expression is passed to the reducer as an additional argument instead of
417 // passing it as a member of redux_evaluator to help
418 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
419}
420
421/** \returns the minimum of all coefficients of \c *this.
422 * In case \c *this contains NaN, NaNPropagation determines the behavior:
423 * NaNPropagation == PropagateFast : undefined
424 * NaNPropagation == PropagateNaN : result is NaN
425 * NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN
426 * \warning the matrix must be not empty, otherwise an assertion is triggered.
427 */
428template<typename Derived>
429template<int NaNPropagation>
432{
434}
435
436/** \returns the maximum of all coefficients of \c *this.
437 * In case \c *this contains NaN, NaNPropagation determines the behavior:
438 * NaNPropagation == PropagateFast : undefined
439 * NaNPropagation == PropagateNaN : result is NaN
440 * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
441 * \warning the matrix must be not empty, otherwise an assertion is triggered.
442 */
443template<typename Derived>
444template<int NaNPropagation>
445EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
449}
451/** \returns the sum of all coefficients of \c *this
452 *
453 * If \c *this is empty, then the value 0 is returned.
454 *
455 * \sa trace(), prod(), mean()
456 */
457template<typename Derived>
460{
461 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
462 return Scalar(0);
463 return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
464}
465
466/** \returns the mean of all coefficients of *this
467*
468* \sa trace(), prod(), sum()
469*/
470template<typename Derived>
473{
474#ifdef __INTEL_COMPILER
475 #pragma warning push
476 #pragma warning ( disable : 2259 )
477#endif
478 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
479#ifdef __INTEL_COMPILER
480 #pragma warning pop
481#endif
482}
483
484/** \returns the product of all coefficients of *this
485 *
486 * Example: \include MatrixBase_prod.cpp
487 * Output: \verbinclude MatrixBase_prod.out
488 *
489 * \sa sum(), mean(), trace()
490 */
491template<typename Derived>
494{
495 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
496 return Scalar(1);
497 return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
498}
499
500/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
501 *
502 * \c *this can be any matrix, not necessarily square.
503 *
504 * \sa diagonal(), sum()
505 */
506template<typename Derived>
509{
510 return derived().diagonal().sum();
511}
512
513} // end namespace Eigen
514
515#endif // EIGEN_REDUX_H
#define EIGEN_PLAIN_ENUM_MAX(a, b)
Definition: Macros.h:1299
#define EIGEN_DEBUG_VAR(x)
Definition: Macros.h:908
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:986
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
Definition: Macros.h:1059
#define eigen_assert(x)
Definition: Macros.h:1047
#define EIGEN_STRONG_INLINE
Definition: Macros.h:927
#define EIGEN_UNROLLING_LIMIT
Defines the maximal loop size to enable meta unrolling of loops.
Definition: Settings.h:24
internal::traits< Derived >::Scalar Scalar
The numeric type of the expression' coefficients, e.g.
Definition: DenseBase.h:66
EIGEN_DEVICE_FUNC internal::traits< Derived >::Scalar minCoeff() const
EIGEN_DEVICE_FUNC internal::traits< Derived >::Scalar maxCoeff() const
EIGEN_DEVICE_FUNC Scalar prod() const
Definition: Redux.h:493
EIGEN_DEVICE_FUNC Scalar redux(const BinaryOp &func) const
EIGEN_DEVICE_FUNC Scalar sum() const
Definition: Redux.h:459
EIGEN_DEVICE_FUNC Scalar mean() const
Definition: Redux.h:472
EIGEN_DEVICE_FUNC Scalar trace() const
Definition: Redux.h:508
Definition: Redux.h:358
XprType::Scalar Scalar
Definition: Redux.h:365
XprType::CoeffReturnType CoeffReturnType
Definition: Redux.h:366
XprType::PacketScalar PacketScalar
Definition: Redux.h:367
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE redux_evaluator(const XprType &xpr)
Definition: Redux.h:363
@ IsRowMajor
Definition: Redux.h:374
@ InnerSizeAtCompileTime
Definition: Redux.h:376
@ MaxColsAtCompileTime
Definition: Redux.h:371
@ Flags
Definition: Redux.h:373
@ SizeAtCompileTime
Definition: Redux.h:375
@ MaxRowsAtCompileTime
Definition: Redux.h:370
_XprType XprType
Definition: Redux.h:361
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetByOuterInner(Index outer, Index inner) const
Definition: Redux.h:385
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
Definition: Redux.h:380
@ Unaligned
Data pointer has no specific alignment.
Definition: Constants.h:233
const unsigned int LinearAccessBit
Short version: means the expression can be seen as 1D vector.
Definition: Constants.h:130
const unsigned int DirectAccessBit
Means that the underlying array of coefficients can be directly accessed as a plain strided array.
Definition: Constants.h:155
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
EIGEN_DEVICE_FUNC Index first_default_aligned(const Scalar *array, Index size)
Definition: Memory.h:497
Namespace containing all symbols from the Eigen library.
Definition: MatrixExponential.h:16
const unsigned int ActualPacketAccessBit
Definition: Constants.h:107
const int HugeCost
This value means that the cost to evaluate an expression coefficient is either very expensive or cann...
Definition: Constants.h:44
@ LinearVectorizedTraversal
Definition: Constants.h:285
@ DefaultTraversal
Definition: Constants.h:277
@ SliceVectorizedTraversal
Definition: Constants.h:288
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
@ CompleteUnrolling
Definition: Constants.h:304
@ NoUnrolling
Definition: Constants.h:299
const int Dynamic
This value means that a positive quantity (e.g., a size) is not known at compile-time,...
Definition: Constants.h:22
Definition: Eigen_Colamd.h:50
Definition: XprHelper.h:332
Definition: CoreEvaluators.h:91
find_best_packet_helper< Size, typenamepacket_traits< T >::type >::type type
Definition: XprHelper.h:208
Definition: XprHelper.h:176
Definition: GenericPacketMath.h:107
redux_novec_unroller< Func, Evaluator, 0, Evaluator::SizeAtCompileTime > Base
Definition: Redux.h:216
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func, const XprType &)
Definition: Redux.h:220
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:198
redux_traits< Func, Evaluator >::PacketType PacketType
Definition: Redux.h:330
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:339
redux_traits< Func, Evaluator >::PacketType PacketScalar
Definition: Redux.h:230
static Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:233
static EIGEN_DEVICE_FUNC Scalar run(const Evaluator &eval, const Func &func, const XprType &xpr)
Definition: Redux.h:293
redux_traits< Func, Evaluator >::PacketType PacketType
Definition: Redux.h:290
Definition: Redux.h:189
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &, const Func &)
Definition: Redux.h:140
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &)
Definition: Redux.h:126
@ HalfLength
Definition: Redux.h:102
Evaluator::Scalar Scalar
Definition: Redux.h:105
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func &func)
Definition: Redux.h:108
Definition: Redux.h:28
@ Traversal
Definition: Redux.h:53
@ SliceVectorizedWork
Definition: Redux.h:39
@ InnerMaxSize
Definition: Redux.h:33
@ PacketSize
Definition: Redux.h:32
@ OuterMaxSize
Definition: Redux.h:36
@ MayLinearVectorize
Definition: Redux.h:47
@ MaySliceVectorize
Definition: Redux.h:48
@ MightVectorize
Definition: Redux.h:45
find_best_packet< typenameEvaluator::Scalar, Evaluator::SizeAtCompileTime >::type PacketType
Definition: Redux.h:30
@ UnrollingLimit
Definition: Redux.h:62
@ Cost
Definition: Redux.h:60
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func &)
Definition: Redux.h:168
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func &func)
Definition: Redux.h:150
Definition: BinaryFunctors.h:172
Definition: BinaryFunctors.h:139
Definition: BinaryFunctors.h:71
Definition: BinaryFunctors.h:33
Definition: ForwardDeclarations.h:17
Definition: GenericPacketMath.h:133