DFT-FE 1.1.0-pre
Density Functional Theory With Finite-Elements
Loading...
Searching...
No Matches
MPIPatternP2P.h
Go to the documentation of this file.
1// ---------------------------------------------------------------------
2//
3// Copyright (c) 2017-2025 The Regents of the University of Michigan and DFT-FE
4// authors.
5//
6// This file is part of the DFT-FE code.
7//
8// The DFT-FE code is free software; you can use it, redistribute
9// it, and/or modify it under the terms of the GNU Lesser General
10// Public License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12// The full text of the license can be found in the file LICENSE at
13// the top level of the DFT-FE distribution.
14//
15// ---------------------------------------------------------------------
16//
17
18/*
19 * @author Sambit Das, Bikash Kanungo
20 */
21
22#ifndef dftfeMPIPatternP2P_h
23#define dftfeMPIPatternP2P_h
24
25#include <MemorySpaceType.h>
26#include <MemoryStorage.h>
27#include <OptimizedIndexSet.h>
28#include <vector>
29#include <mpi.h>
30namespace dftfe
31{
32 namespace utils
33 {
34 namespace mpi
35 {
36 /** @brief A class template to store the communication pattern
37 * (i.e., which entries/nodes to receive from which processor and
38 * which entries/nodes to send to which processor).
39 *
40 *
41 * + <b>Assumptions</b>
42 * 1. It assumes that a a sparse communication pattern. That is,
43 * a given processor only communicates with a few processors.
44 * This object should be avoided if the communication pattern
45 * is dense (e.g., all-to-all communication)
46 * 2. It assumes that the each processor owns a set of \em continuous
47 * integers (indices). Further, the ownership is exclusive (i.e.,
48 * no index is owned by more than one processor). In other words,
49 * the different sets of owning indices across all the processors
50 * are disjoint.
51 *
52 * @tparam memorySpace Defines the MemorySpace (i.e., HOST or
53 * DEVICE) in which the various data members of this object must reside.
54 */
55 template <dftfe::utils::MemorySpace memorySpace>
57 {
58 ///
59 /// typedefs
60 ///
61 public:
65
66 public:
67 virtual ~MPIPatternP2P() = default;
68
69 /**
70 * @brief Constructor. This constructor is the typical way of
71 * creation of an MPI pattern.
72 *
73 * @param[in] locallyOwnedRange A pair of non-negtive integers
74 * \f$(a,b)\f$ which defines a range of indices (continuous)
75 * that are owned by the current processor.
76 * @note It is an open interval where \f$a\f$ is included,
77 * but \f$b\f$ is not included.
78 *
79 * @param[in] ghostIndices An ordered set of non-negtive indices
80 * specifyin the ghost indices for the current processor.
81 * @note the vector must be ordered
82 * (i.e., ordered in increasing order and non-repeating)
83 *
84 * @param[in] mpiComm The MPI communicator object which defines the
85 * set of processors for which the MPI pattern needs to be created.
86 *
87 * @throw Throws exception if \p mpiComm is in an invalid state, if
88 * the \p locallyOwnedRange across all the processors are not disjoint,
89 * if \p ghostIndices are not ordered (if it is not strictly
90 * increasing), or if some sanity checks with respect to MPI sends and
91 * receives fail.
92 *
93 * @note Care is taken to create a dummy MPIPatternP2P while not linking
94 * to an MPI library. This allows the user code to seamlessly link and
95 * delink an MPI library.
96 */
97 MPIPatternP2P(const std::pair<global_size_type, global_size_type>
98 &locallyOwnedRange,
99 const std::vector<dftfe::global_size_type> &ghostIndices,
100 const MPI_Comm & mpiComm);
101 /**
102 * @brief Constructor. This constructor is to create an MPI Pattern for
103 * a serial case. This is provided so that one can seamlessly use
104 * has to be used even for a serial case. In this case, all the indices
105 * are owned by the current processor.
106 *
107 * @param[in] size Total number of indices.
108 * @note This is an explicitly serial construction (i.e., it uses
109 * MPI_COMM_SELF), which is different from the dummy MPIPatternP2P
110 * created while not linking to an MPI library. For examples,
111 * within a parallel run, one might have the need to create a serial
112 * MPIPatternP2P. A typical case is creation of a serial vector as a
113 * special case of distributed vector.
114 * @note Similar to the previous
115 * constructor, care is taken to create a dummy MPIPatternP2P while not
116 * linking to an MPI library.
117 */
118 MPIPatternP2P(const size_type size);
119
120
121
122 // void
123 // reinit(){};
124
125 std::pair<global_size_type, global_size_type>
126 getLocallyOwnedRange() const;
127
129 localOwnedSize() const;
130
132 localGhostSize() const;
133
134 bool
135 inLocallyOwnedRange(const global_size_type globalId) const;
136
137 bool
138 isGhostEntry(const global_size_type globalId) const;
139
141 globalToLocal(const global_size_type globalId) const;
142
144 localToGlobal(const size_type localId) const;
145
146 const std::vector<global_size_type> &
147 getGhostIndices() const;
148
149 const std::vector<size_type> &
150 getGhostProcIds() const;
151
152 const std::vector<size_type> &
154
156 getNumGhostIndicesInProc(const size_type procId) const;
157
159 getGhostLocalIndices(const size_type procId) const;
160
161 const std::vector<size_type> &
163
164 const std::vector<size_type> &
165 getTargetProcIds() const;
166
167 const std::vector<size_type> &
169
171 getNumOwnedIndicesForTargetProc(const size_type procId) const;
172
173 const SizeTypeVector &
175
177 getOwnedLocalIndices(const size_type procId) const;
178
180 nmpiProcesses() const;
181
183 thisProcessId() const;
184
186 nGlobalIndices() const;
187
188 const MPI_Comm &
189 mpiCommunicator() const;
190
191 bool
193
194 private:
195 /**
196 * A pair \f$(a,b)\f$ which defines a range of indices (continuous)
197 * that are owned by the current processor.
198 *
199 * @note It is an open interval where \f$a\f$ is included,
200 * but \f$b\f$ is not included.
201 */
202 std::pair<global_size_type, global_size_type> d_locallyOwnedRange;
203
204 /**
205 * A vector of size 2 times number of processors to store the
206 * locallyOwnedRange of each processor. That is it store the list
207 * \f$[a_0,b_0,a_1,b_1,\ldots,a_{P-1},b_{P-1}]\f$, where the pair
208 * \f$(a_i,b_i)\f$ defines a range of indices (continuous) that are
209 * owned by the \f$i-\f$th processor.
210 *
211 * @note \f$a\f$ is included but \f$b\f$ is not included.
212 */
213 std::vector<global_size_type> d_allOwnedRanges;
214
215 /**
216 * Number of locally owned indices in the current processor
217 */
219
220 /**
221 * Number of ghost indices in the current processor
222 */
224
225 /**
226 * Vector to store an ordered set of ghost indices
227 * (ordered in increasing order and non-repeating)
228 */
229 std::vector<global_size_type> d_ghostIndices;
230
231 /**
232 * A copy of the above d_ghostIndices stored as an STL set
233 */
234 std::set<global_size_type> d_ghostIndicesSetSTL;
235
236 /**
237 * An OptimizedIndexSet object to store the ghost indices for
238 * efficient operations. The OptimizedIndexSet internally creates
239 * contiguous sub-ranges within the set of indices and hence can
240 * optimize the finding of an index
241 */
243
244 /**
245 * Number of ghost processors for the current processor. A ghost
246 * processor is one which owns at least one of the ghost indices of this
247 * processor.
248 */
250
251 /**
252 * Vector to store the ghost processor Ids. A ghost processor is
253 * one which owns at least one of the ghost indices of this processor.
254 */
255 std::vector<size_type> d_ghostProcIds;
256
257 /** Vector of size number of ghost processors to store how many ghost
258 * indices
259 * of this current processor are owned by a ghost processor.
260 */
261 std::vector<size_type> d_numGhostIndicesInGhostProcs;
262
263 /**
264 * A flattened vector of size number of ghosts containing the ghost
265 * indices ordered as per the list of ghost processor Ids in
266 * d_ghostProcIds In other words it stores a concatentaion of the lists
267 * \f$L_i = \{g^{(k_i)}_1,g^{(k_i)}_2,\ldots,g^{(k_i)}_{N_i}\}\f$, where
268 * \f$g\f$'s are the ghost indices, \f$k_i\f$ is the rank of the
269 * \f$i\f$-th ghost processor (i.e., d_ghostProcIds[i]) and \f$N_i\f$
270 * is the number of ghost indices owned by the \f$i\f$-th
271 * ghost processor (i.e., d_numGhostIndicesInGhostProcs[i]).
272
273 * @note \f$L_i\f$ has to be an increasing set.
274
275 * @note We store only the ghost index local to this processor, i.e.,
276 * position of the ghost index in d_ghostIndicesSetSTL or
277 d_ghostIndices.
278 * This is done to use size_type which is unsigned int instead of
279 * global_size_type which is long unsigned it. This helps in reducing
280 the
281 * volume of data transfered during MPI calls.
282
283 * @note In the case that the locally owned ranges across all the
284 * processors are ordered as per the processor Id, this vector is
285 * redundant and one can only work with d_ghostIndices and
286 * d_numGhostIndicesInGhostProcs. By locally owned range being ordered
287 as
288 * per the processor Id, means that the ranges for processor
289 * \f$0, 1,\ldots,P-1\f$ are
290 * \f$[N_0,N_1), [N_1, N_2), [N_2, N_3), ..., [N_{P-1},N_P)\f$ with
291 * \f$N_0, N_1,\ldots, N_P\f$ beign non-decreasing. But in a more
292 general
293 * case, where the locally owned ranges are not ordered as per the
294 processor
295 * Id, this following array is useful.
296 */
298
299 /**
300 * @brief A vector of size 2 times the number of ghost processors
301 * to store the range of local ghost indices that are owned by the
302 * ghost processors. In other words, it stores the list
303 * \f$L=\{a_1,b_1, a_2, b_2, \ldots, a_G, b_G\}\f$, where
304 * \f$a_i\f$ and \f$b_i\f$is are the start local ghost index
305 * and one-past-the-last local ghost index of the current processor
306 * that is owned by the \f$i\f$-th ghost processor
307 * (i.e., d_ghostProcIds[i]). Put it differently, \f$[a_i,b_i)\f$ form
308 * an open interval, where \f$a_i\f$ is included but \f$b_i\f$ is not
309 * included.
310 *
311 * @note Given the fact that the locally owned indices of each processor
312 * are contiguous and the global ghost indices (i.e., d_ghostIndices) is
313 * ordered, it is sufficient to just store the range of local ghost
314 * indices for each ghost procId. The actual global ghost indices
315 * belonging to the \f$i\f$-th ghost processor can be fetched from
316 * d_ghostIndices (i.e., it is the subset of d_ghostIndices lying
317 * bewteen d_ghostIndices[a_i] and d_ghostIndices[b_i].
318 */
319 std::vector<size_type> d_localGhostIndicesRanges;
320
321 /**
322 * Number of target processors for the current processor. A
323 * target processor is one which owns at least one of the locally owned
324 * indices of this processor as its ghost index.
325 */
327
328 /**
329 * Vector to store the target processor Ids. A target processor is
330 * one which contains at least one of the locally owned indices of this
331 * processor as its ghost index.
332 */
333 std::vector<size_type> d_targetProcIds;
334
335 /**
336 * Vector of size number of target processors to store how many locally
337 * owned indices
338 * of this current processor are need ghost in each of the target
339 * processors.
340 */
341 std::vector<size_type> d_numOwnedIndicesForTargetProcs;
342
343 /** Vector of size \f$\sum_i\f$ d_numOwnedIndicesForTargetProcs[i]
344 * to store all thelocally owned indices
345 * which other processors need (i.e., which are ghost indices in other
346 * processors). It is stored as a concatentation of lists where the
347 * \f$i\f$-th list indices
348 * \f$L_i = \{o^{(k_i)}_1,o^{(k_i)}_2,\ldots,o^{(k_i)}_{N_i}\}\f$, where
349 * where \f$o\f$'s are indices target to other processors,
350 * \f$k_i\f$ is the rank of the \f$i\f$-th target processor
351 * (i.e., d_targetProcIds[i]) and N_i is the number of
352 * indices to be sent to i-th target processor (i.e.,
353 * d_numOwnedIndicesForTargetProcs[i]).
354 *
355 * @note We store only the indices local to this processor, i.e.,
356 * the relative position of the index in the locally owned range of this
357 * processor This is done to use size_type which is unsigned int instead
358 * of global_size_type which is long unsigned it. This helps in reducing
359 * the volume of data transfered during MPI calls.
360 *
361 * @note The list \f$L_i\f$ must be ordered.
362 */
364
365 /// Number of processors in the MPI Communicator.
367
368 /// Rank of the current processor.
370
371 /**
372 * Total number of unique indices across all processors
373 */
375
376 /// MPI Communicator object.
377 MPI_Comm d_mpiComm;
378 };
379
380 } // end of namespace mpi
381 } // end of namespace utils
382} // end of namespace dftfe
383
384#include "MPIPatternP2P.t.cc"
385#endif // dftfeMPIPatternP2P_h
Definition MemoryStorage.h:33
Definition OptimizedIndexSet.h:44
SizeTypeVector getOwnedLocalIndices(const size_type procId) const
Definition MPIPatternP2P.t.cc:790
size_type globalToLocal(const global_size_type globalId) const
Definition MPIPatternP2P.t.cc:899
const std::vector< size_type > & getGhostLocalIndicesRanges() const
Definition MPIPatternP2P.t.cc:672
size_type getNumGhostIndicesInProc(const size_type procId) const
Definition MPIPatternP2P.t.cc:679
MPIPatternP2P(const std::pair< global_size_type, global_size_type > &locallyOwnedRange, const std::vector< dftfe::global_size_type > &ghostIndices, const MPI_Comm &mpiComm)
Constructor. This constructor is the typical way of creation of an MPI pattern.
Definition MPIPatternP2P.t.cc:245
global_size_type localToGlobal(const size_type localId) const
Definition MPIPatternP2P.t.cc:872
virtual ~MPIPatternP2P()=default
size_type d_numTargetProcs
Definition MPIPatternP2P.h:326
size_type getNumOwnedIndicesForTargetProc(const size_type procId) const
Definition MPIPatternP2P.t.cc:762
utils::MemoryStorage< global_size_type, memorySpace > GlobalSizeTypeVector
Definition MPIPatternP2P.h:63
utils::MemoryStorage< size_type, memorySpace > SizeTypeVector
Definition MPIPatternP2P.h:62
size_type thisProcessId() const
Definition MPIPatternP2P.t.cc:843
SizeTypeVector d_flattenedLocalTargetIndices
Definition MPIPatternP2P.h:363
OptimizedIndexSet< global_size_type > d_ghostIndicesOptimizedIndexSet
Definition MPIPatternP2P.h:242
int d_myRank
Rank of the current processor.
Definition MPIPatternP2P.h:369
SizeTypeVector getGhostLocalIndices(const size_type procId) const
Definition MPIPatternP2P.t.cc:706
int d_nprocs
Number of processors in the MPI Communicator.
Definition MPIPatternP2P.h:366
const std::vector< size_type > & getGhostProcIds() const
Definition MPIPatternP2P.t.cc:657
std::pair< global_size_type, global_size_type > getLocallyOwnedRange() const
Definition MPIPatternP2P.t.cc:643
std::set< global_size_type > d_ghostIndicesSetSTL
Definition MPIPatternP2P.h:234
const SizeTypeVector & getOwnedLocalIndicesForTargetProcs() const
Definition MPIPatternP2P.t.cc:755
size_type localOwnedSize() const
Definition MPIPatternP2P.t.cc:857
size_type d_numGhostProcs
Definition MPIPatternP2P.h:249
global_size_type d_nGlobalIndices
Definition MPIPatternP2P.h:374
const std::vector< size_type > & getTargetProcIds() const
Definition MPIPatternP2P.t.cc:741
const std::vector< size_type > & getNumOwnedIndicesForTargetProcs() const
Definition MPIPatternP2P.t.cc:748
size_type d_numGhostIndices
Definition MPIPatternP2P.h:223
bool isCompatible(const MPIPatternP2P< memorySpace > &rhs) const
Definition MPIPatternP2P.t.cc:949
SizeTypeVector d_flattenedLocalGhostIndices
Definition MPIPatternP2P.h:297
std::vector< size_type > d_targetProcIds
Definition MPIPatternP2P.h:333
std::vector< global_size_type > d_allOwnedRanges
Definition MPIPatternP2P.h:213
global_size_type nGlobalIndices() const
Definition MPIPatternP2P.t.cc:850
std::vector< size_type > d_numGhostIndicesInGhostProcs
Definition MPIPatternP2P.h:261
std::pair< global_size_type, global_size_type > d_locallyOwnedRange
Definition MPIPatternP2P.h:202
const std::vector< size_type > & getNumGhostIndicesInProcs() const
Definition MPIPatternP2P.t.cc:664
size_type nmpiProcesses() const
Definition MPIPatternP2P.t.cc:836
bool inLocallyOwnedRange(const global_size_type globalId) const
Definition MPIPatternP2P.t.cc:929
bool isGhostEntry(const global_size_type globalId) const
Definition MPIPatternP2P.t.cc:938
size_type localGhostSize() const
Definition MPIPatternP2P.t.cc:864
const MPI_Comm & mpiCommunicator() const
Definition MPIPatternP2P.t.cc:829
std::vector< size_type > d_numOwnedIndicesForTargetProcs
Definition MPIPatternP2P.h:341
std::vector< global_size_type > d_ghostIndices
Definition MPIPatternP2P.h:229
size_type d_numLocallyOwnedIndices
Definition MPIPatternP2P.h:218
std::vector< size_type > d_ghostProcIds
Definition MPIPatternP2P.h:255
std::vector< size_type > d_localGhostIndicesRanges
A vector of size 2 times the number of ghost processors to store the range of local ghost indices tha...
Definition MPIPatternP2P.h:319
MPI_Comm d_mpiComm
MPI Communicator object.
Definition MPIPatternP2P.h:377
const std::vector< global_size_type > & getGhostIndices() const
Definition MPIPatternP2P.t.cc:650
Definition MPICommunicatorP2P.h:46
Definition Cell.h:36
Definition pseudoPotentialToDftfeConverter.cc:34
unsigned int size_type
Definition TypeConfig.h:6
unsigned long int global_size_type
Definition TypeConfig.h:7