WPILibC++ 2023.4.3
Parallelizer.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_PARALLELIZER_H
11#define EIGEN_PARALLELIZER_H
12
13#if EIGEN_HAS_CXX11_ATOMIC
14#include <atomic>
15#endif
16
17namespace Eigen {
18
19namespace internal {
20
21/** \internal */
22inline void manage_multi_threading(Action action, int* v)
23{
24 static int m_maxThreads = -1;
25 EIGEN_UNUSED_VARIABLE(m_maxThreads)
26
27 if(action==SetAction)
28 {
30 m_maxThreads = *v;
31 }
32 else if(action==GetAction)
33 {
35 #ifdef EIGEN_HAS_OPENMP
36 if(m_maxThreads>0)
37 *v = m_maxThreads;
38 else
39 *v = omp_get_max_threads();
40 #else
41 *v = 1;
42 #endif
43 }
44 else
45 {
47 }
48}
49
50}
51
52/** Must be call first when calling Eigen from multiple threads */
53inline void initParallel()
54{
55 int nbt;
57 std::ptrdiff_t l1, l2, l3;
59}
60
61/** \returns the max number of threads reserved for Eigen
62 * \sa setNbThreads */
63inline int nbThreads()
64{
65 int ret;
67 return ret;
68}
69
70/** Sets the max number of threads reserved for Eigen
71 * \sa nbThreads */
72inline void setNbThreads(int v)
73{
75}
76
77namespace internal {
78
79template<typename Index> struct GemmParallelInfo
80{
82
83 // volatile is not enough on all architectures (see bug 1572)
84 // to guarantee that when thread A says to thread B that it is
85 // done with packing a block, then all writes have been really
86 // carried out... C++11 memory model+atomic guarantees this.
87#if EIGEN_HAS_CXX11_ATOMIC
88 std::atomic<Index> sync;
89 std::atomic<int> users;
90#else
91 Index volatile sync;
92 int volatile users;
93#endif
94
97};
98
99template<bool Condition, typename Functor, typename Index>
100void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
101{
102 // TODO when EIGEN_USE_BLAS is defined,
103 // we should still enable OMP for other scalar types
104 // Without C++11, we have to disable GEMM's parallelization on
105 // non x86 architectures because there volatile is not enough for our purpose.
106 // See bug 1572.
107#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
108 // FIXME the transpose variable is only needed to properly split
109 // the matrix product when multithreading is enabled. This is a temporary
110 // fix to support row-major destination matrices. This whole
111 // parallelizer mechanism has to be redesigned anyway.
113 EIGEN_UNUSED_VARIABLE(transpose);
114 func(0,rows, 0,cols);
115#else
116
117 // Dynamically check whether we should enable or disable OpenMP.
118 // The conditions are:
119 // - the max number of threads we can create is greater than 1
120 // - we are not already in a parallel code
121 // - the sizes are large enough
122
123 // compute the maximal number of threads from the size of the product:
124 // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
125 Index size = transpose ? rows : cols;
126 Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
127
128 // compute the maximal number of threads from the total amount of work:
129 double work = static_cast<double>(rows) * static_cast<double>(cols) *
130 static_cast<double>(depth);
131 double kMinTaskSize = 50000; // FIXME improve this heuristic.
132 pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));
133
134 // compute the number of threads we are going to use
135 Index threads = std::min<Index>(nbThreads(), pb_max_threads);
136
137 // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,
138 // then abort multi-threading
139 // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
140 if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
141 return func(0,rows, 0,cols);
142
144 func.initParallelSession(threads);
145
146 if(transpose)
147 std::swap(rows,cols);
148
150
151 #pragma omp parallel num_threads(threads)
152 {
153 Index i = omp_get_thread_num();
154 // Note that the actual number of threads might be lower than the number of request ones.
155 Index actual_threads = omp_get_num_threads();
156
157 Index blockCols = (cols / actual_threads) & ~Index(0x3);
158 Index blockRows = (rows / actual_threads);
159 blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
160
161 Index r0 = i*blockRows;
162 Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
163
164 Index c0 = i*blockCols;
165 Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
166
167 info[i].lhs_start = r0;
168 info[i].lhs_length = actualBlockRows;
169
170 if(transpose) func(c0, actualBlockCols, 0, rows, info);
171 else func(0, rows, c0, actualBlockCols, info);
172 }
173#endif
174}
175
176} // end namespace internal
177
178} // end namespace Eigen
179
180#endif // EIGEN_PARALLELIZER_H
#define eigen_internal_assert(x)
Definition: Macros.h:1053
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:1086
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:768
and restrictions which apply to each piece of software is included later in this file and or inside of the individual applicable source files The disclaimer of warranty in the WPILib license above applies to all code in and nothing in any of the other licenses gives permission to use the names of FIRST nor the names of the WPILib contributors to endorse or promote products derived from this software The following pieces of software have additional or alternate and or Google Inc All rights reserved Redistribution and use in source and binary with or without are permitted provided that the following conditions are this list of conditions and the following disclaimer *Redistributions in binary form must reproduce the above copyright this list of conditions and the following disclaimer in the documentation and or other materials provided with the distribution *Neither the name of Google Inc nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED BUT NOT LIMITED THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY OR CONSEQUENTIAL WHETHER IN STRICT OR EVEN IF ADVISED OF THE POSSIBILITY OF SUCH January AND DISTRIBUTION Definitions License shall mean the terms and conditions for and distribution as defined by Sections through of this document Licensor shall mean the copyright owner or entity authorized by the copyright owner that is granting the License Legal Entity shall mean the union of the acting entity and all other entities that control are controlled by or are under common control with that entity For the purposes of this definition control direct or to cause the direction or management of such whether by contract or including but not limited to software source documentation and configuration files Object form shall mean any form resulting from mechanical transformation or translation of a Source including but not limited to compiled object generated and conversions to other media types Work shall mean the work of whether in Source or Object made available under the as indicated by a copyright notice that is included in or attached to the work(an example is provided in the Appendix below). "Derivative Works" shall mean any work
void parallelize_gemm(const Functor &func, Index rows, Index cols, Index depth, bool transpose)
Definition: Parallelizer.h:100
void manage_multi_threading(Action action, int *v)
Definition: Parallelizer.h:22
void manage_caching_sizes(Action action, std::ptrdiff_t *l1, std::ptrdiff_t *l2, std::ptrdiff_t *l3)
Definition: GeneralBlockPanelKernel.h:86
EIGEN_CONSTEXPR Index size(const T &x)
Definition: Meta.h:479
Namespace containing all symbols from the Eigen library.
Definition: MatrixExponential.h:16
Action
Definition: Constants.h:504
@ GetAction
Definition: Constants.h:504
@ SetAction
Definition: Constants.h:504
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
void initParallel()
Must be call first when calling Eigen from multiple threads.
Definition: Parallelizer.h:53
int nbThreads()
Definition: Parallelizer.h:63
void setNbThreads(int v)
Sets the max number of threads reserved for Eigen.
Definition: Parallelizer.h:72
Definition: Eigen_Colamd.h:50
void swap(wpi::SmallVectorImpl< T > &LHS, wpi::SmallVectorImpl< T > &RHS)
Implement std::swap in terms of SmallVector swap.
Definition: SmallVector.h:1299
Definition: Parallelizer.h:80
GemmParallelInfo()
Definition: Parallelizer.h:81
Index lhs_length
Definition: Parallelizer.h:96
Index lhs_start
Definition: Parallelizer.h:95
Index volatile sync
Definition: Parallelizer.h:91
int volatile users
Definition: Parallelizer.h:92