Line | Branch | Exec | Source |
---|---|---|---|
1 | // SPDX-FileCopyrightText: 2022-2023 Dennis Gläser <dennis.glaeser@iws.uni-stuttgart.de> | ||
2 | // SPDX-License-Identifier: MIT | ||
3 | /*! | ||
4 | * \file | ||
5 | * \ingroup Parallel | ||
6 | * \brief Traits for parallel communication. | ||
7 | */ | ||
8 | #ifndef GRIDFORMAT_PARALLEL_TRAITS_HPP_ | ||
9 | #define GRIDFORMAT_PARALLEL_TRAITS_HPP_ | ||
10 | |||
11 | #include <array> | ||
12 | #include <ranges> | ||
13 | #include <vector> | ||
14 | #include <type_traits> | ||
15 | #include <algorithm> | ||
16 | |||
17 | #include <gridformat/common/ranges.hpp> | ||
18 | #include <gridformat/common/concepts.hpp> | ||
19 | #include <gridformat/common/exceptions.hpp> | ||
20 | #include <gridformat/common/type_traits.hpp> | ||
21 | |||
22 | namespace GridFormat::ParallelTraits { | ||
23 | |||
24 | //! \addtogroup Parallel | ||
25 | //! \{ | ||
26 | |||
27 | //! Metafunction to obtain the number of processes from a communicator via a static `int get(const Communicator&)` | ||
28 | template<typename Communicator> | ||
29 | struct Size; | ||
30 | |||
31 | //! Metafunction to obtain the rank of a process from a communicator via a static `int get(const Communicator&)` | ||
32 | template<typename Communicator> | ||
33 | struct Rank; | ||
34 | |||
35 | //! Metafunction to obtain a barrier for all processes to reach before continuation via a static function `int get(const Communicator&)` | ||
36 | template<typename Communicator> | ||
37 | struct Barrier; | ||
38 | |||
39 | //! Metafunction to compute the maximum for a value over all processes via a static function `T get(const Communicator&, const T& values, int root_rank = 0)` | ||
40 | template<typename Communicator> | ||
41 | struct Max; | ||
42 | |||
43 | //! Metafunction to compute the minimum for a value over all processes via a static function `T get(const Communicator&, const T& values, int root_rank = 0)` | ||
44 | template<typename Communicator> | ||
45 | struct Min; | ||
46 | |||
47 | //! Metafunction to compute the sum over values on all processes via a static function `T get(const Communicator&, const T& values, int root_rank = 0)` | ||
48 | template<typename Communicator> | ||
49 | struct Sum; | ||
50 | |||
51 | //! Metafunction to broadcast values from the root to all other processes via a static function `T get(const Communicator&, const T& values, int root_rank = 0)` | ||
52 | template<typename Communicator> | ||
53 | struct BroadCast; | ||
54 | |||
55 | //! Metafunction to gather values from all processes via a static function `std::vector<T> get(const Communicator&, const T& values, int root_rank = 0)` | ||
56 | //! Only the root process will receive the result | ||
57 | template<typename Communicator> | ||
58 | struct Gather; | ||
59 | |||
60 | //! Metafunction to scatter values to all processes via a static function `std::vector<T> get(const Communicator&, const T& values, int root_rank = 0)` | ||
61 | //! Only the root process will receive the result | ||
62 | template<typename Communicator> | ||
63 | struct Scatter; | ||
64 | |||
65 | //! \} group Parallel | ||
66 | |||
67 | } // namespace GridFormat::ParallelTraits | ||
68 | |||
69 | namespace GridFormat { | ||
70 | |||
71 | struct NullCommunicator {}; | ||
72 | |||
73 | namespace ParallelTraits { | ||
74 | |||
75 | template<> | ||
76 | struct Size<NullCommunicator> { | ||
77 | 3254 | static constexpr int get(const NullCommunicator&) { return 1; } | |
78 | }; | ||
79 | |||
80 | template<> | ||
81 | struct Rank<NullCommunicator> { | ||
82 | 200 | static constexpr int get(const NullCommunicator&) { return 0; } | |
83 | }; | ||
84 | |||
85 | template<> | ||
86 | struct Barrier<NullCommunicator> { | ||
87 | 130 | static constexpr int get(const NullCommunicator&) { return 0; } | |
88 | }; | ||
89 | |||
90 | template<> | ||
91 | struct Max<NullCommunicator> { | ||
92 | template<typename T> | ||
93 | 1 | static constexpr const T& get(const NullCommunicator&, const T& data, [[maybe_unused]] int root_rank = 0) { | |
94 | 1 | return data; | |
95 | } | ||
96 | }; | ||
97 | |||
98 | template<> | ||
99 | struct Min<NullCommunicator> { | ||
100 | template<typename T> | ||
101 | 1 | static constexpr const T& get(const NullCommunicator&, const T& data, [[maybe_unused]] int root_rank = 0) { | |
102 | 1 | return data; | |
103 | } | ||
104 | }; | ||
105 | |||
106 | template<> | ||
107 | struct Sum<NullCommunicator> { | ||
108 | template<typename T> | ||
109 | 1 | static constexpr const T& get(const NullCommunicator&, const T& data, [[maybe_unused]] int root_rank = 0) { | |
110 | 1 | return data; | |
111 | } | ||
112 | }; | ||
113 | |||
114 | template<> | ||
115 | struct BroadCast<NullCommunicator> { | ||
116 | template<typename T> | ||
117 | 1 | static constexpr const T& get(const NullCommunicator&, const T& data, [[maybe_unused]] int root_rank = 0) { | |
118 | 1 | return data; | |
119 | } | ||
120 | }; | ||
121 | |||
122 | template<> | ||
123 | struct Gather<NullCommunicator> { | ||
124 | template<Concepts::Scalar T> | ||
125 | 1 | static constexpr std::vector<T> get(const NullCommunicator&, const T& value, [[maybe_unused]] int root_rank = 0) { | |
126 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
3 | return {value}; |
127 | } | ||
128 | |||
129 | template<Concepts::Scalar T> | ||
130 | 1 | static constexpr const std::vector<T>& get(const NullCommunicator&, const std::vector<T>& vec, [[maybe_unused]] int root_rank = 0) { | |
131 | 1 | return vec; | |
132 | } | ||
133 | |||
134 | template<std::ranges::range R> | ||
135 | 6 | static constexpr auto get(const NullCommunicator&, const R& r, [[maybe_unused]] int root_rank = 0) { | |
136 |
1/2✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
|
6 | std::vector<std::ranges::range_value_t<R>> result(Ranges::size(r)); |
137 |
1/2✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
|
6 | std::ranges::copy(r, result.begin()); |
138 | 6 | return result; | |
139 | ✗ | } | |
140 | }; | ||
141 | |||
142 | template<> | ||
143 | struct Scatter<NullCommunicator> { | ||
144 | public: | ||
145 | template<typename T> | ||
146 | 1 | static constexpr const auto& get(const NullCommunicator&, const std::vector<T>& vec, [[maybe_unused]] int root_rank = 0) { | |
147 | 1 | return vec; | |
148 | } | ||
149 | |||
150 | template<std::ranges::contiguous_range R> | ||
151 | requires(!Concepts::StaticallySizedRange<R>) | ||
152 | static constexpr auto get(const NullCommunicator&, const R& r, [[maybe_unused]] int root_rank = 0) { | ||
153 | std::vector<std::ranges::range_value_t<R>> result; | ||
154 | result.resize(Ranges::size(r)); | ||
155 | std::ranges::copy(r, result.begin()); | ||
156 | return result; | ||
157 | } | ||
158 | |||
159 | template<std::ranges::contiguous_range R> | ||
160 | requires(Concepts::StaticallySizedRange<R>) | ||
161 | static constexpr auto get(const NullCommunicator&, const R& r, [[maybe_unused]] int root_rank = 0) { | ||
162 | std::array<std::ranges::range_value_t<R>, static_size<R>> result; | ||
163 | std::ranges::copy(r, result.begin()); | ||
164 | return result; | ||
165 | } | ||
166 | }; | ||
167 | |||
168 | } // namespace ParallelTraits | ||
169 | |||
170 | } // namespace GridFormat | ||
171 | |||
172 | #if GRIDFORMAT_HAVE_MPI | ||
173 | |||
174 | #include <mpi.h> | ||
175 | |||
176 | #include <gridformat/common/exceptions.hpp> | ||
177 | |||
178 | namespace GridFormat::ParallelTraits { | ||
179 | |||
180 | #ifndef DOXYGEN | ||
181 | namespace MPIDetail { | ||
182 | |||
183 | template<typename T> | ||
184 | 115236 | decltype(auto) get_data_type() { | |
185 | if constexpr (std::is_same_v<T, char>) | ||
186 | 128 | return MPI_CHAR; | |
187 | else if constexpr (std::is_same_v<T, signed short int>) | ||
188 | return MPI_SHORT; | ||
189 | else if constexpr (std::is_same_v<T, signed int>) | ||
190 | return MPI_INT; | ||
191 | else if constexpr (std::is_same_v<T, signed long int>) | ||
192 | return MPI_LONG; | ||
193 | else if constexpr (std::is_same_v<T, unsigned char>) | ||
194 | return MPI_UNSIGNED_CHAR; | ||
195 | else if constexpr (std::is_same_v<T, unsigned short int>) | ||
196 | return MPI_UNSIGNED_SHORT; | ||
197 | else if constexpr (std::is_same_v<T, unsigned int>) | ||
198 | return MPI_UNSIGNED; | ||
199 | else if constexpr (std::is_same_v<T, unsigned long int>) | ||
200 | 85136 | return MPI_UNSIGNED_LONG; | |
201 | else if constexpr (std::is_same_v<T, float>) | ||
202 | return MPI_FLOAT; | ||
203 | else if constexpr (std::is_same_v<T, double>) | ||
204 | 29972 | return MPI_DOUBLE; | |
205 | else if constexpr (std::is_same_v<T, long double>) | ||
206 | return MPI_LONG_DOUBLE; | ||
207 | else | ||
208 | throw TypeError("Cannot deduce mpi type from given type"); | ||
209 | } | ||
210 | |||
211 | template<typename T> | ||
212 | 688 | void reduce(const T* in, T* out, MPI_Comm comm, MPI_Op operation, int num_values, int root_rank) { | |
213 | 688 | MPI_Reduce(in, out, num_values, MPIDetail::get_data_type<T>(), operation, root_rank, comm); | |
214 | 688 | } | |
215 | |||
216 | } // namespace MPIDetail | ||
217 | #endif // DOXYGEN | ||
218 | |||
219 | template<> | ||
220 | struct Size<MPI_Comm> { | ||
221 | 68848 | static int get(MPI_Comm comm) { | |
222 | int s; | ||
223 |
1/2✓ Branch 1 taken 68848 times.
✗ Branch 2 not taken.
|
68848 | MPI_Comm_size(comm, &s); |
224 | 68848 | return s; | |
225 | } | ||
226 | }; | ||
227 | |||
228 | template<> | ||
229 | struct Rank<MPI_Comm> { | ||
230 | 37069 | static int get(MPI_Comm comm) { | |
231 | int r; | ||
232 |
1/2✓ Branch 1 taken 37069 times.
✗ Branch 2 not taken.
|
37069 | MPI_Comm_rank(comm, &r); |
233 | 37069 | return r; | |
234 | } | ||
235 | }; | ||
236 | |||
237 | template<> | ||
238 | struct Barrier<MPI_Comm> { | ||
239 | 12550 | static int get(MPI_Comm comm) { | |
240 | 12550 | return MPI_Barrier(comm); | |
241 | } | ||
242 | }; | ||
243 | |||
244 | template<> | ||
245 | struct Max<MPI_Comm> { | ||
246 | template<Concepts::Scalar T> | ||
247 | static T get(MPI_Comm comm, const T& value, int root_rank = 0) { | ||
248 | static constexpr int num_values = 1; | ||
249 | T result; | ||
250 | MPIDetail::reduce(&value, &result, comm, MPI_MAX, num_values, root_rank); | ||
251 | return result; | ||
252 | } | ||
253 | |||
254 | template<Concepts::StaticallySizedMDRange<1> R> requires(std::ranges::contiguous_range<R>) | ||
255 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | ||
256 | static constexpr int num_values = static_size<R>; | ||
257 | std::array<std::ranges::range_value_t<R>, num_values> result; | ||
258 | MPIDetail::reduce(std::ranges::cdata(values), result.data(), comm, MPI_MAX, num_values, root_rank); | ||
259 | return result; | ||
260 | } | ||
261 | }; | ||
262 | |||
263 | template<> | ||
264 | struct Min<MPI_Comm> { | ||
265 | template<Concepts::Scalar T> | ||
266 | static T get(MPI_Comm comm, const T& value, int root_rank = 0) { | ||
267 | static constexpr int num_values = 1; | ||
268 | T result; | ||
269 | MPIDetail::reduce(&value, &result, comm, MPI_MIN, num_values, root_rank); | ||
270 | return result; | ||
271 | } | ||
272 | |||
273 | template<Concepts::StaticallySizedMDRange<1> R> requires(std::ranges::contiguous_range<R>) | ||
274 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | ||
275 | static constexpr int num_values = static_size<R>; | ||
276 | std::array<std::ranges::range_value_t<R>, num_values> result; | ||
277 | MPIDetail::reduce(std::ranges::cdata(values), result.data(), comm, MPI_MIN, num_values, root_rank); | ||
278 | return result; | ||
279 | } | ||
280 | }; | ||
281 | |||
282 | template<> | ||
283 | struct Sum<MPI_Comm> { | ||
284 | template<Concepts::Scalar T> | ||
285 | 688 | static T get(MPI_Comm comm, const T& value, int root_rank = 0) { | |
286 | static constexpr int num_values = 1; | ||
287 | T result; | ||
288 |
1/2✓ Branch 1 taken 688 times.
✗ Branch 2 not taken.
|
688 | MPIDetail::reduce(&value, &result, comm, MPI_SUM, num_values, root_rank); |
289 | 688 | return result; | |
290 | } | ||
291 | |||
292 | template<Concepts::StaticallySizedMDRange<1> R> requires(std::ranges::contiguous_range<R>) | ||
293 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | ||
294 | static constexpr int num_values = static_size<R>; | ||
295 | std::array<std::ranges::range_value_t<R>, num_values> result; | ||
296 | MPIDetail::reduce(std::ranges::cdata(values), result.data(), comm, MPI_SUM, num_values, root_rank); | ||
297 | return result; | ||
298 | } | ||
299 | }; | ||
300 | |||
301 | template<> | ||
302 | struct BroadCast<MPI_Comm> { | ||
303 | template<Concepts::Scalar T> | ||
304 | 8064 | static T get(MPI_Comm comm, const T& value, int root_rank = 0) { | |
305 | static constexpr int num_values = 1; | ||
306 | 8064 | T result = value; | |
307 |
1/2✓ Branch 2 taken 8064 times.
✗ Branch 3 not taken.
|
8064 | MPI_Bcast( |
308 | &result, | ||
309 | num_values, | ||
310 | MPIDetail::get_data_type<T>(), | ||
311 | root_rank, | ||
312 | comm | ||
313 | ); | ||
314 | 8064 | return result; | |
315 | } | ||
316 | |||
317 | template<std::ranges::contiguous_range R> requires(!Concepts::StaticallySizedRange<R>) | ||
318 | 292 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | |
319 | using T = std::ranges::range_value_t<R>; | ||
320 |
1/2✓ Branch 2 taken 292 times.
✗ Branch 3 not taken.
|
292 | const auto num_values = BroadCast<MPI_Comm>::get(comm, std::ranges::size(values), root_rank); |
321 |
1/2✓ Branch 1 taken 292 times.
✗ Branch 2 not taken.
|
292 | std::vector<T> result(num_values); |
322 |
1/2✓ Branch 2 taken 292 times.
✗ Branch 3 not taken.
|
292 | std::ranges::copy(values, result.begin()); |
323 |
1/2✓ Branch 2 taken 292 times.
✗ Branch 3 not taken.
|
584 | MPI_Bcast( |
324 | 292 | result.data(), | |
325 | num_values, | ||
326 | MPIDetail::get_data_type<T>(), | ||
327 | root_rank, | ||
328 | comm | ||
329 | ); | ||
330 | 292 | return result; | |
331 | ✗ | } | |
332 | |||
333 | template<std::ranges::contiguous_range R> requires(Concepts::StaticallySizedRange<R>) | ||
334 | 17196 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | |
335 | using T = std::ranges::range_value_t<R>; | ||
336 | static constexpr auto num_values = static_size<R>; | ||
337 | std::array<T, num_values> result; | ||
338 |
1/2✓ Branch 1 taken 1954 times.
✗ Branch 2 not taken.
|
17196 | std::ranges::copy(values, result.begin()); |
339 | 17196 | MPI_Bcast( | |
340 |
1/2✓ Branch 1 taken 1954 times.
✗ Branch 2 not taken.
|
17196 | result.data(), |
341 | num_values, | ||
342 | MPIDetail::get_data_type<T>(), | ||
343 | root_rank, | ||
344 | comm | ||
345 | ); | ||
346 | 17196 | return result; | |
347 | } | ||
348 | }; | ||
349 | |||
350 | template<> | ||
351 | struct Gather<MPI_Comm> { | ||
352 | template<Concepts::Scalar T> | ||
353 | 912 | static auto get(MPI_Comm comm, const T& value, int root_rank = 0) { | |
354 | static constexpr int num_values = 1; | ||
355 | 912 | const int this_rank = Rank<MPI_Comm>::get(comm); | |
356 | |||
357 | 912 | std::vector<T> result; | |
358 |
2/4✓ Branch 1 taken 912 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 912 times.
✗ Branch 5 not taken.
|
912 | result.resize(Size<MPI_Comm>::get(comm), T{0}); |
359 | |||
360 |
3/4✓ Branch 1 taken 440 times.
✓ Branch 2 taken 472 times.
✓ Branch 5 taken 912 times.
✗ Branch 6 not taken.
|
1352 | MPI_Gather( |
361 | &value, | ||
362 | num_values, | ||
363 | MPIDetail::get_data_type<T>(), | ||
364 | 440 | (this_rank == root_rank ? result.data() : NULL), | |
365 | num_values, | ||
366 | MPIDetail::get_data_type<T>(), | ||
367 | root_rank, | ||
368 | comm | ||
369 | ); | ||
370 | 912 | return result; | |
371 | ✗ | } | |
372 | |||
373 | template<Concepts::StaticallySizedMDRange<1> R> requires(std::ranges::contiguous_range<R>) | ||
374 | 25520 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | |
375 | using T = std::ranges::range_value_t<R>; | ||
376 | static constexpr int num_values = static_size<R>; | ||
377 | |||
378 | 25520 | const int this_rank = Rank<MPI_Comm>::get(comm); | |
379 |
2/4✓ Branch 1 taken 12760 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 12760 times.
✗ Branch 5 not taken.
|
25520 | std::vector<T> result(Size<MPI_Comm>::get(comm)*num_values, T{0}); |
380 |
3/4✓ Branch 1 taken 4304 times.
✓ Branch 2 taken 8456 times.
✓ Branch 5 taken 12760 times.
✗ Branch 6 not taken.
|
59648 | MPI_Gather( |
381 | 25520 | std::ranges::cdata(values), | |
382 | num_values, | ||
383 | MPIDetail::get_data_type<T>(), | ||
384 | 8608 | (this_rank == root_rank ? result.data() : NULL), | |
385 | num_values, | ||
386 | MPIDetail::get_data_type<T>(), | ||
387 | root_rank, | ||
388 | comm | ||
389 | ); | ||
390 | 25520 | return result; | |
391 | ✗ | } | |
392 | }; | ||
393 | |||
394 | template<> | ||
395 | struct Scatter<MPI_Comm> { | ||
396 | template<std::ranges::contiguous_range R> requires( | ||
397 | !Concepts::StaticallySizedRange<R> and | ||
398 | std::ranges::sized_range<R>) | ||
399 | 7064 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | |
400 | using T = std::ranges::range_value_t<R>; | ||
401 |
1/2✓ Branch 2 taken 7064 times.
✗ Branch 3 not taken.
|
7064 | const int num_values = static_cast<int>(BroadCast<MPI_Comm>::get(comm, std::ranges::size(values), root_rank)); |
402 | 7064 | const int size = Size<MPI_Comm>::get(comm); | |
403 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7064 times.
|
7064 | if (num_values%size != 0) |
404 | ✗ | throw SizeError("Cannot scatter data with unequal chunks per process"); | |
405 | |||
406 |
1/2✓ Branch 1 taken 7064 times.
✗ Branch 2 not taken.
|
7064 | std::vector<T> result(num_values/size); |
407 |
1/2✓ Branch 3 taken 7064 times.
✗ Branch 4 not taken.
|
21192 | MPI_Scatter( |
408 | 7064 | std::ranges::cdata(values), | |
409 | num_values/size, | ||
410 | MPIDetail::get_data_type<T>(), | ||
411 | 7064 | result.data(), | |
412 | num_values/size, | ||
413 | MPIDetail::get_data_type<T>(), | ||
414 | root_rank, | ||
415 | comm | ||
416 | ); | ||
417 | 7064 | return result; | |
418 | ✗ | } | |
419 | |||
420 | template<std::ranges::contiguous_range R> requires(Concepts::StaticallySizedRange<R>) | ||
421 | static auto get(MPI_Comm comm, const R& values, int root_rank = 0) { | ||
422 | using T = std::ranges::range_value_t<R>; | ||
423 | const int num_values = static_cast<int>(std::ranges::size(values)); | ||
424 | const int size = Size<MPI_Comm>::get(comm); | ||
425 | |||
426 | if (num_values%size != 0) | ||
427 | throw SizeError("Cannot scatter data with unequal chunks per process"); | ||
428 | |||
429 | std::array<T, static_size<R>> result(num_values/size); | ||
430 | MPI_Scatter( | ||
431 | std::ranges::cdata(values), | ||
432 | num_values/size, | ||
433 | MPIDetail::get_data_type<T>(), | ||
434 | result.data(), | ||
435 | num_values/size, | ||
436 | MPIDetail::get_data_type<T>(), | ||
437 | root_rank, | ||
438 | comm | ||
439 | ); | ||
440 | return result; | ||
441 | } | ||
442 | }; | ||
443 | |||
444 | } // namespace GridFormat::ParallelTraits | ||
445 | |||
446 | #endif // GRIDFORMAT_HAVE_MPI | ||
447 | |||
448 | #endif // GRIDFORMAT_PARALLEL_TRAITS_HPP_ | ||
449 |