dftfe/_m_p_i_pattern_p2_p_8h_source.html

// ---------------------------------------------------------------------

//

// Copyright (c) 2017-2025  The Regents of the University of Michigan and DFT-FE

// authors.

//

// This file is part of the DFT-FE code.

//

// The DFT-FE code is free software; you can use it, redistribute

// it, and/or modify it under the terms of the GNU Lesser General

// Public License as published by the Free Software Foundation; either

// version 2.1 of the License, or (at your option) any later version.

// The full text of the license can be found in the file LICENSE at

// the top level of the DFT-FE distribution.

//

// ---------------------------------------------------------------------

//


/*

 * @author Sambit Das, Bikash Kanungo

 */


#ifndef dftfeMPIPatternP2P_h

#define dftfeMPIPatternP2P_h


#include <MemorySpaceType.h>

#include <MemoryStorage.h>

#include <OptimizedIndexSet.h>

#include <vector>

#include <mpi.h>

namespace dftfe

{

  namespace utils

  {

    namespace mpi

    {

      /** @brief A class template to store the communication pattern

       * (i.e., which entries/nodes to receive from which processor and

       * which entries/nodes to send to which processor).

       *

       *

       * + <b>Assumptions</b>

       *    1. It assumes that a a sparse communication pattern. That is,

       *       a given processor only communicates with a few processors.

       *       This object should be avoided if the communication pattern

       *       is dense (e.g., all-to-all communication)

       *    2. It assumes that the each processor owns a set of \em continuous

       *       integers (indices). Further, the ownership is exclusive (i.e.,

       *       no index is owned by more than one processor). In other words,

       *       the different sets of owning indices across all the processors

       *       are disjoint.

       *

       * @tparam memorySpace Defines the MemorySpace (i.e., HOST or

       * DEVICE) in which the various data members of this object must reside.

       */

      template <dftfe::utils::MemorySpace memorySpace>


      class MPIPatternP2P

      {

        ///

        /// typedefs

        ///

      public:

        using SizeTypeVector = utils::MemoryStorage<dftfe::uInt, memorySpace>;

        using GlobalSizeTypeVector =

          utils::MemoryStorage<dftfe::uInt, memorySpace>;


      public:

        virtual ~MPIPatternP2P() = default;


        /**

         * @brief Constructor. This constructor is the typical way of

         * creation of an MPI pattern.

         *

         * @param[in] locallyOwnedRange A pair of non-negtive integers

         * \f$(a,b)\f$ which defines a range of indices (continuous)

         * that are owned by the current processor.

         * @note It is an open interval where \f$a\f$ is included,

         * but \f$b\f$ is not included.

         *

         * @param[in] ghostIndices An ordered set of non-negtive indices

         * specifyin the ghost indices for the current processor.

         * @note the vector must be ordered

         * (i.e., ordered in increasing order and non-repeating)

         *

         * @param[in] mpiComm The MPI communicator object which defines the

         * set of processors for which the MPI pattern needs to be created.

         *

         * @throw Throws exception if \p mpiComm is in an invalid state, if

         * the \p locallyOwnedRange across all the processors are not disjoint,

         * if \p ghostIndices are not ordered (if it is not strictly

         * increasing), or if some sanity checks with respect to MPI sends and

         * receives fail.

         *

         * @note Care is taken to create a dummy MPIPatternP2P while not linking

         * to an MPI library. This allows the user code to seamlessly link and

         * delink an MPI library.

         */

        MPIPatternP2P(

          const std::pair<dftfe::uInt, dftfe::uInt> &locallyOwnedRange,

          const std::vector<dftfe::uInt>            &ghostIndices,

          const MPI_Comm                            &mpiComm);

        /**

         * @brief Constructor. This constructor is to create an MPI Pattern for

         * a serial case. This is provided so that one can seamlessly use

         * has to be used even for a serial case. In this case, all the indices

         * are owned by the current processor.

         *

         * @param[in] size Total number of indices.

         * @note This is an explicitly serial construction (i.e., it uses

         * MPI_COMM_SELF), which is different from the dummy MPIPatternP2P

         * created while not linking to an MPI library. For examples,

         * within a parallel run, one might have the need to create a serial

         * MPIPatternP2P. A typical case is creation of a serial vector as a

         * special case of distributed vector.

         * @note Similar to the previous

         * constructor, care is taken to create a dummy MPIPatternP2P while not

         * linking to an MPI library.

         */

        MPIPatternP2P(const dftfe::uInt size);


        // void

        // reinit(){};


        std::pair<dftfe::uInt, dftfe::uInt>

        getLocallyOwnedRange() const;


        dftfe::uInt

        localOwnedSize() const;


        dftfe::uInt

        localGhostSize() const;


        bool

        inLocallyOwnedRange(const dftfe::uInt globalId) const;


        bool

        isGhostEntry(const dftfe::uInt globalId) const;


        dftfe::uInt

        globalToLocal(const dftfe::uInt globalId) const;


        dftfe::uInt

        localToGlobal(const dftfe::uInt localId) const;


        const std::vector<dftfe::uInt> &

        getGhostIndices() const;


        const std::vector<dftfe::uInt> &

        getGhostProcIds() const;


        const std::vector<dftfe::uInt> &

        getNumGhostIndicesInProcs() const;


        dftfe::uInt

        getNumGhostIndicesInProc(const dftfe::uInt procId) const;


        SizeTypeVector

        getGhostLocalIndices(const dftfe::uInt procId) const;


        const std::vector<dftfe::uInt> &

        getGhostLocalIndicesRanges() const;


        const std::vector<dftfe::uInt> &

        getTargetProcIds() const;


        const std::vector<dftfe::uInt> &

        getNumOwnedIndicesForTargetProcs() const;


        dftfe::uInt

        getNumOwnedIndicesForTargetProc(const dftfe::uInt procId) const;


        const SizeTypeVector &

        getOwnedLocalIndicesForTargetProcs() const;


        SizeTypeVector

        getOwnedLocalIndices(const dftfe::uInt procId) const;


        dftfe::uInt

        nmpiProcesses() const;


        dftfe::uInt

        thisProcessId() const;


        dftfe::uInt

        nGlobalIndices() const;


        const MPI_Comm &

        mpiCommunicator() const;


        bool

        isCompatible(const MPIPatternP2P<memorySpace> &rhs) const;


      private:

        /**

         * A pair \f$(a,b)\f$ which defines a range of indices (continuous)

         * that are owned by the current processor.

         *

         * @note It is an open interval where \f$a\f$ is included,

         * but \f$b\f$ is not included.

         */

        std::pair<dftfe::uInt, dftfe::uInt> d_locallyOwnedRange;


        /**

         * A vector of size 2 times number of processors to store the

         * locallyOwnedRange of each processor. That is it store the list

         * \f$[a_0,b_0,a_1,b_1,\ldots,a_{P-1},b_{P-1}]\f$, where the pair

         * \f$(a_i,b_i)\f$ defines a range of indices (continuous) that are

         * owned by the \f$i-\f$th processor.

         *

         * @note \f$a\f$ is included but \f$b\f$ is not included.

         */

        std::vector<dftfe::uInt> d_allOwnedRanges;


        /**

         * Number of locally owned indices in the current processor

         */

        dftfe::uInt d_numLocallyOwnedIndices;


        /**

         * Number of ghost indices in the current processor

         */

        dftfe::uInt d_numGhostIndices;


        /**

         * Vector to store an ordered set of ghost indices

         * (ordered in increasing order and non-repeating)

         */

        std::vector<dftfe::uInt> d_ghostIndices;


        /**

         * A copy of the above d_ghostIndices stored as an STL set

         */

        std::set<dftfe::uInt> d_ghostIndicesSetSTL;


        /**

         * An OptimizedIndexSet object to store the ghost indices for

         * efficient operations. The OptimizedIndexSet internally creates

         * contiguous sub-ranges within the set of indices and hence can

         * optimize the finding of an index

         */

        OptimizedIndexSet<dftfe::uInt> d_ghostIndicesOptimizedIndexSet;


        /**

         * Number of ghost processors for the current processor. A ghost

         * processor is one which owns at least one of the ghost indices of this

         * processor.

         */

        dftfe::uInt d_numGhostProcs;


        /**

         * Vector to store the ghost processor Ids. A ghost processor is

         * one which owns at least one of the ghost indices of this processor.

         */

        std::vector<dftfe::uInt> d_ghostProcIds;


        /** Vector of size number of ghost processors to store how many ghost

         * indices

         *  of this current processor are owned by a ghost processor.

         */

        std::vector<dftfe::uInt> d_numGhostIndicesInGhostProcs;


        /**

         * A flattened vector of size number of ghosts containing the ghost

         * indices ordered as per the list of ghost processor Ids in

         * d_ghostProcIds In other words it stores a concatentaion of the lists

         * \f$L_i = \{g^{(k_i)}_1,g^{(k_i)}_2,\ldots,g^{(k_i)}_{N_i}\}\f$, where

         * \f$g\f$'s are the ghost indices, \f$k_i\f$ is the rank of the

         * \f$i\f$-th ghost processor (i.e., d_ghostProcIds[i]) and \f$N_i\f$

         * is the number of ghost indices owned by the \f$i\f$-th

         * ghost processor (i.e., d_numGhostIndicesInGhostProcs[i]).


         * @note \f$L_i\f$ has to be an increasing set.


         * @note We store only the ghost index local to this processor, i.e.,

         * position of the ghost index in d_ghostIndicesSetSTL or

         d_ghostIndices.

         * This is done to use dftfe::uInt which is dftfe::uInt instead of

         * dftfe::uInt which is long unsigned it. This helps in reducing

         the

         * volume of data transfered during MPI calls.


         * @note In the case that the locally owned ranges across all the

         * processors are ordered as per the processor Id, this vector is

         * redundant and one can only work with d_ghostIndices and

         * d_numGhostIndicesInGhostProcs. By locally owned range being ordered

         as

         * per the processor Id, means that the ranges for processor

         * \f$0, 1,\ldots,P-1\f$ are

         * \f$[N_0,N_1), [N_1, N_2), [N_2, N_3), ..., [N_{P-1},N_P)\f$ with

         * \f$N_0, N_1,\ldots, N_P\f$ beign non-decreasing. But in a more

         general

         * case, where the locally owned ranges are not ordered as per the

         processor

         * Id, this following array is useful.

         */

        SizeTypeVector d_flattenedLocalGhostIndices;


        /**

         * @brief A vector of size 2 times the number of ghost processors

         * to store the range of local ghost indices that are owned by the

         * ghost processors. In other words, it stores the list

         * \f$L=\{a_1,b_1, a_2, b_2, \ldots, a_G, b_G\}\f$, where

         * \f$a_i\f$ and \f$b_i\f$is are the start local ghost index

         * and one-past-the-last local ghost index of the current processor

         * that is owned by the \f$i\f$-th ghost processor

         * (i.e., d_ghostProcIds[i]). Put it differently, \f$[a_i,b_i)\f$ form

         * an open interval, where \f$a_i\f$ is included but \f$b_i\f$ is not

         * included.

         *

         * @note Given the fact that the locally owned indices of each processor

         * are contiguous and the global ghost indices (i.e., d_ghostIndices) is

         * ordered, it is sufficient to just store the range of local ghost

         * indices for each ghost procId. The actual global ghost indices

         * belonging to the \f$i\f$-th ghost processor can be fetched from

         * d_ghostIndices (i.e., it is the subset of d_ghostIndices lying

         * bewteen d_ghostIndices[a_i] and d_ghostIndices[b_i].

         */

        std::vector<dftfe::uInt> d_localGhostIndicesRanges;


        /**

         * Number of target processors for the current processor. A

         * target processor is one which owns at least one of the locally owned

         * indices of this processor as its ghost index.

         */

        dftfe::uInt d_numTargetProcs;


        /**

         * Vector to store the target processor Ids. A target processor is

         * one which contains at least one of the locally owned indices of this

         * processor as its ghost index.

         */

        std::vector<dftfe::uInt> d_targetProcIds;


        /**

         * Vector of size number of target processors to store how many locally

         * owned indices

         * of this current processor are need ghost in each of the target

         *  processors.

         */

        std::vector<dftfe::uInt> d_numOwnedIndicesForTargetProcs;


        /** Vector of size \f$\sum_i\f$ d_numOwnedIndicesForTargetProcs[i]

         * to store all thelocally owned indices

         * which other processors need (i.e., which are ghost indices in other

         * processors). It is stored as a concatentation of lists where the

         * \f$i\f$-th list indices

         * \f$L_i = \{o^{(k_i)}_1,o^{(k_i)}_2,\ldots,o^{(k_i)}_{N_i}\}\f$, where

         * where \f$o\f$'s are indices target to other processors,

         * \f$k_i\f$ is the rank of the \f$i\f$-th target processor

         * (i.e., d_targetProcIds[i]) and N_i is the number of

         * indices to be sent to i-th target processor (i.e.,

         * d_numOwnedIndicesForTargetProcs[i]).

         *

         * @note We store only the indices local to this processor, i.e.,

         * the relative position of the index in the locally owned range of this

         * processor This is done to use dftfe::uInt which is dftfe::uInt

         * instead of dftfe::uInt which is long unsigned it. This helps in

         * reducing the volume of data transfered during MPI calls.

         *

         *  @note The list \f$L_i\f$ must be ordered.

         */

        SizeTypeVector d_flattenedLocalTargetIndices;


        /// Number of processors in the MPI Communicator.

        int d_nprocs;


        /// Rank of the current processor.

        int d_myRank;


        /**

         * Total number of unique indices across all processors

         */

        dftfe::uInt d_nGlobalIndices;


        /// MPI Communicator object.

        MPI_Comm d_mpiComm;

      };


    } // end of namespace mpi

  }   // end of namespace utils

} // end of namespace dftfe


#include "MPIPatternP2P.t.cc"

#endif // dftfeMPIPatternP2P_h

MPIPatternP2P.t.cc

MemorySpaceType.h

MemoryStorage.h

OptimizedIndexSet.h

dftfe::utils::MemoryStorage
Definition MemoryStorage.h:33

dftfe::utils::OptimizedIndexSet
Definition OptimizedIndexSet.h:44

dftfe::utils::mpi::MPIPatternP2P::getGhostLocalIndices
SizeTypeVector getGhostLocalIndices(const dftfe::uInt procId) const
Definition MPIPatternP2P.t.cc:712

dftfe::utils::mpi::MPIPatternP2P::~MPIPatternP2P
virtual ~MPIPatternP2P()=default

dftfe::utils::mpi::MPIPatternP2P::localToGlobal
dftfe::uInt localToGlobal(const dftfe::uInt localId) const
Definition MPIPatternP2P.t.cc:878

dftfe::utils::mpi::MPIPatternP2P::inLocallyOwnedRange
bool inLocallyOwnedRange(const dftfe::uInt globalId) const
Definition MPIPatternP2P.t.cc:935

dftfe::utils::mpi::MPIPatternP2P::d_ghostIndicesOptimizedIndexSet
OptimizedIndexSet< dftfe::uInt > d_ghostIndicesOptimizedIndexSet
Definition MPIPatternP2P.h:242

dftfe::utils::mpi::MPIPatternP2P::d_allOwnedRanges
std::vector< dftfe::uInt > d_allOwnedRanges
Definition MPIPatternP2P.h:213

dftfe::utils::mpi::MPIPatternP2P::getNumGhostIndicesInProc
dftfe::uInt getNumGhostIndicesInProc(const dftfe::uInt procId) const
Definition MPIPatternP2P.t.cc:685

dftfe::utils::mpi::MPIPatternP2P::getNumGhostIndicesInProcs
const std::vector< dftfe::uInt > & getNumGhostIndicesInProcs() const
Definition MPIPatternP2P.t.cc:670

dftfe::utils::mpi::MPIPatternP2P::d_localGhostIndicesRanges
std::vector< dftfe::uInt > d_localGhostIndicesRanges
A vector of size 2 times the number of ghost processors to store the range of local ghost indices tha...
Definition MPIPatternP2P.h:319

dftfe::utils::mpi::MPIPatternP2P::GlobalSizeTypeVector
utils::MemoryStorage< dftfe::uInt, memorySpace > GlobalSizeTypeVector
Definition MPIPatternP2P.h:63

dftfe::utils::mpi::MPIPatternP2P::MPIPatternP2P
MPIPatternP2P(const std::pair< dftfe::uInt, dftfe::uInt > &locallyOwnedRange, const std::vector< dftfe::uInt > &ghostIndices, const MPI_Comm &mpiComm)
Constructor. This constructor is the typical way of creation of an MPI pattern.
Definition MPIPatternP2P.t.cc:246

dftfe::utils::mpi::MPIPatternP2P::getGhostLocalIndicesRanges
const std::vector< dftfe::uInt > & getGhostLocalIndicesRanges() const
Definition MPIPatternP2P.t.cc:678

dftfe::utils::mpi::MPIPatternP2P::d_flattenedLocalTargetIndices
SizeTypeVector d_flattenedLocalTargetIndices
Definition MPIPatternP2P.h:363

dftfe::utils::mpi::MPIPatternP2P::d_numTargetProcs
dftfe::uInt d_numTargetProcs
Definition MPIPatternP2P.h:326

dftfe::utils::mpi::MPIPatternP2P::isGhostEntry
bool isGhostEntry(const dftfe::uInt globalId) const
Definition MPIPatternP2P.t.cc:944

dftfe::utils::mpi::MPIPatternP2P::d_myRank
int d_myRank
Rank of the current processor.
Definition MPIPatternP2P.h:369

dftfe::utils::mpi::MPIPatternP2P::d_nprocs
int d_nprocs
Number of processors in the MPI Communicator.
Definition MPIPatternP2P.h:366

dftfe::utils::mpi::MPIPatternP2P::d_ghostProcIds
std::vector< dftfe::uInt > d_ghostProcIds
Definition MPIPatternP2P.h:255

dftfe::utils::mpi::MPIPatternP2P::d_numOwnedIndicesForTargetProcs
std::vector< dftfe::uInt > d_numOwnedIndicesForTargetProcs
Definition MPIPatternP2P.h:341

dftfe::utils::mpi::MPIPatternP2P::getNumOwnedIndicesForTargetProc
dftfe::uInt getNumOwnedIndicesForTargetProc(const dftfe::uInt procId) const
Definition MPIPatternP2P.t.cc:768

dftfe::utils::mpi::MPIPatternP2P::d_locallyOwnedRange
std::pair< dftfe::uInt, dftfe::uInt > d_locallyOwnedRange
Definition MPIPatternP2P.h:202

dftfe::utils::mpi::MPIPatternP2P::getOwnedLocalIndicesForTargetProcs
const SizeTypeVector & getOwnedLocalIndicesForTargetProcs() const
Definition MPIPatternP2P.t.cc:761

dftfe::utils::mpi::MPIPatternP2P::nGlobalIndices
dftfe::uInt nGlobalIndices() const
Definition MPIPatternP2P.t.cc:856

dftfe::utils::mpi::MPIPatternP2P::localOwnedSize
dftfe::uInt localOwnedSize() const
Definition MPIPatternP2P.t.cc:863

dftfe::utils::mpi::MPIPatternP2P::d_targetProcIds
std::vector< dftfe::uInt > d_targetProcIds
Definition MPIPatternP2P.h:333

dftfe::utils::mpi::MPIPatternP2P::d_numGhostProcs
dftfe::uInt d_numGhostProcs
Definition MPIPatternP2P.h:249

dftfe::utils::mpi::MPIPatternP2P::d_ghostIndices
std::vector< dftfe::uInt > d_ghostIndices
Definition MPIPatternP2P.h:229

dftfe::utils::mpi::MPIPatternP2P::thisProcessId
dftfe::uInt thisProcessId() const
Definition MPIPatternP2P.t.cc:849

dftfe::utils::mpi::MPIPatternP2P::isCompatible
bool isCompatible(const MPIPatternP2P< memorySpace > &rhs) const
Definition MPIPatternP2P.t.cc:954

dftfe::utils::mpi::MPIPatternP2P::nmpiProcesses
dftfe::uInt nmpiProcesses() const
Definition MPIPatternP2P.t.cc:842

dftfe::utils::mpi::MPIPatternP2P::d_flattenedLocalGhostIndices
SizeTypeVector d_flattenedLocalGhostIndices
Definition MPIPatternP2P.h:297

dftfe::utils::mpi::MPIPatternP2P::SizeTypeVector
utils::MemoryStorage< dftfe::uInt, memorySpace > SizeTypeVector
Definition MPIPatternP2P.h:62

dftfe::utils::mpi::MPIPatternP2P::getLocallyOwnedRange
std::pair< dftfe::uInt, dftfe::uInt > getLocallyOwnedRange() const
Definition MPIPatternP2P.t.cc:649

dftfe::utils::mpi::MPIPatternP2P::localGhostSize
dftfe::uInt localGhostSize() const
Definition MPIPatternP2P.t.cc:870

dftfe::utils::mpi::MPIPatternP2P::mpiCommunicator
const MPI_Comm & mpiCommunicator() const
Definition MPIPatternP2P.t.cc:835

dftfe::utils::mpi::MPIPatternP2P::d_numGhostIndicesInGhostProcs
std::vector< dftfe::uInt > d_numGhostIndicesInGhostProcs
Definition MPIPatternP2P.h:261

dftfe::utils::mpi::MPIPatternP2P::globalToLocal
dftfe::uInt globalToLocal(const dftfe::uInt globalId) const
Definition MPIPatternP2P.t.cc:905

dftfe::utils::mpi::MPIPatternP2P::getOwnedLocalIndices
SizeTypeVector getOwnedLocalIndices(const dftfe::uInt procId) const
Definition MPIPatternP2P.t.cc:796

dftfe::utils::mpi::MPIPatternP2P::getGhostIndices
const std::vector< dftfe::uInt > & getGhostIndices() const
Definition MPIPatternP2P.t.cc:656

dftfe::utils::mpi::MPIPatternP2P::d_nGlobalIndices
dftfe::uInt d_nGlobalIndices
Definition MPIPatternP2P.h:374

dftfe::utils::mpi::MPIPatternP2P::d_mpiComm
MPI_Comm d_mpiComm
MPI Communicator object.
Definition MPIPatternP2P.h:377

dftfe::utils::mpi::MPIPatternP2P::getNumOwnedIndicesForTargetProcs
const std::vector< dftfe::uInt > & getNumOwnedIndicesForTargetProcs() const
Definition MPIPatternP2P.t.cc:754

dftfe::utils::mpi::MPIPatternP2P::d_ghostIndicesSetSTL
std::set< dftfe::uInt > d_ghostIndicesSetSTL
Definition MPIPatternP2P.h:234

dftfe::utils::mpi::MPIPatternP2P::getTargetProcIds
const std::vector< dftfe::uInt > & getTargetProcIds() const
Definition MPIPatternP2P.t.cc:747

dftfe::utils::mpi::MPIPatternP2P::d_numGhostIndices
dftfe::uInt d_numGhostIndices
Definition MPIPatternP2P.h:223

dftfe::utils::mpi::MPIPatternP2P::d_numLocallyOwnedIndices
dftfe::uInt d_numLocallyOwnedIndices
Definition MPIPatternP2P.h:218

dftfe::utils::mpi::MPIPatternP2P::getGhostProcIds
const std::vector< dftfe::uInt > & getGhostProcIds() const
Definition MPIPatternP2P.t.cc:663

dftfe::utils::mpi
Definition MPICommunicatorP2P.h:46

dftfe::utils
Definition Cell.h:36

dftfe
Definition pseudoPotentialToDftfeConverter.cc:34

dftfe::uInt
std::uint32_t uInt
Definition TypeConfig.h:10