dft-efe/MPIRequestersNBX_8h_source.html

/******************************************************************************

 * Copyright (c) 2021.                                                        *

 * The Regents of the University of Michigan and DFT-EFE developers.          *

 *                                                                            *

 * This file is part of the DFT-EFE code.                                     *

 *                                                                            *

 * DFT-EFE is free software: you can redistribute it and/or modify            *

 *   it under the terms of the Lesser GNU General Public License as           *

 *   published by the Free Software Foundation, either version 3 of           *

 *   the License, or (at your option) any later version.                      *

 *                                                                            *

 * DFT-EFE is distributed in the hope that it will be useful, but             *

 *   WITHOUT ANY WARRANTY; without even the implied warranty                  *

 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                     *

 *   See the Lesser GNU General Public License for more details.              *

 *                                                                            *

 * You should have received a copy of the GNU Lesser General Public           *

 *   License at the top level of DFT-EFE distribution.  If not, see           *

 *   <https://www.gnu.org/licenses/>.                                         *

 ******************************************************************************/


/*

 * @author Bikash Kanungo

 */


#ifndef dftefeMPIRequestersNBX_h

#define dftefeMPIRequestersNBX_h


#include <utils/TypeConfig.h>

#include <utils/MPITypes.h>

#include <utils/MPIRequestersBase.h>

#include <vector>

#include <memory>

#include <set>

namespace dftefe

{

  namespace utils

  {

    namespace mpi

    {

      class MPIRequestersNBX : public MPIRequestersBase

      {

        /*

         * @brief Implements the Non-blocking Consensus (NBX) algorithm as

         * described in the following paper to determine the list of requesting

         * processors for the current processors

         * @article{hoefler2010scalable,

         *   title={Scalable communication protocols for dynamic sparse data

         * exchange}, author={Hoefler, Torsten and Siebert, Christian and

         * Lumsdaine, Andrew}, journal={ACM Sigplan Notices}, volume={45},

         *   number={5},

         *   pages={159--168},

         *   year={2010},

         *   publisher={ACM New York, NY, USA}

         * }

         */


        /*

         * The following is a brief description of the typical use case

         * situation. Each processor has a list of target processors to which it

         * wants to send a message (think of it as a message to another

         * processor to request some data that is owned by the other processor).

         * Similarly, other processors might be requesting the current processor

         * for some of the data owned by the current processor. However, the

         * current processor has no apriori knowledge of which processors will

         * be requesting data from it. The challenge is to utilize the current

         * processor's list of target processors to determine the current

         * processor's requesting processors. In other words, we have to use a

         * one way communication information to figure out the other way (its

         * dual).

         *

         * Perhaps a more concrete example might help. Let's say, we have a

         * vector/array which is distributed across a set of processors.

         * Each processors own part of the vector. The ownership is exclusive,

         * i.e., a processor is the sole owner of its part of the vector.

         * In practice, it means that the processor owns a set of indices of the

         * vector. Additionally, the different sets of owning indices across all

         * the processors are disjoint. Moreover, the union of the sets across

         * all the processors gives the set of indices of the distributed

         * vector. However, each processor also needs information on a set of

         * non-owned indices (hereafter termed ghost indices) based on the needs

         * of the application. Based on the ghost indices, the current processor

         * can easily determine the processor where it is owned. These

         * processors are termed as target processors to which the current

         * processor has to send a request to access the ghost data. Similarly,

         * the ghost indices in some other processor might be owned by this

         * processor. In that case, the other processor will be sending a

         * request to the current processor to access some of its data (data

         * which is ghost to the other processor but owned by the current

         * processor). But the current processor has no apriori knowledge of

         * which processors will be requesting data from it. A knowledge of it

         * will help the current processor to prepare for the request of data.

         *

         * In cases of sparse communication, that is, where each processor only

         * needs to communicate with a small subset of the total number of

         * processors, the NBX algorithm offers an algorithm of complexity

         * O(log(P)) (where P is the number of processors) to determing the

         * list of requesting processors. The algorithm works as follows:

         *

         * 1. The current processor sends  nonblocking synchronous message

         * (i.e., MPI_ISsend) to all its target processors. Remember that

         * the current processor already has information about its target

         * processors. Also, note that the status of the  nonblocking

         * synchronous send turns to "completed" only when a when

         * the message has been received by a receiving processor. Let's

         * call this operation as the "local-send", as we are sending

         * requests to target processors that are locally known by the current

         * processor.

         *

         * 2. The current processor keeps on doing nonblocking probe for

         * incoming message (i.e., MPI_IProbe). The MPI_IProbe checks if there

         * is an incoming message matching a given source and tag or not. The

         * source is the index of the source processor sending the message and

         * tag is an MPI_tag associated with exchange. It does not initiate any

         * receive operation , it only verfies whether there is something to be

         * received or not. For our purpose, we will use a wildcards

         * MPI_ANY_SOURCE and MPI_ANY_TAG, as we just want to know if there is

         * an incoming message or not. In the event that there is an incoming

         * message (i.e., the MPI_IProbe's flag is true), we can extract the

         * source processor from the status handle of the MPI_IProbe and append

         * it to a list that stores the requesting processor IDs. Addtionally,

         * in the event that there is an incoming messag, we call a non-blocking

         * receive (i.e., MPI_IRecv) to initiate the actual

         * reception of the incoming. The MPI_Recv, in turn, will complete

         * the status of source processor's MPI_ISsend through which the

         * incoming message was sent to the current processor. Thus, we

         * achieve two things over here: we detected a requesting processor

         * and we also signaled the requesting processor that we have received

         * their message. But this is only job half-done. How do we tell the

         * current processor to stop probing for incoming message? And how do

         * inform all the processors involved that all the incoming messages

         * across all the processors have been received? This kind of problem

         * is what is called a Consensus Problem

         * (https://en.wikipedia.org/wiki/Consensus_(computer_science)).

         * The way to reach the consensus in NBX is a two-step process:

         * (a) the current processor checks if all the "local-send"

         *     (see #1 above) has been received or not.

         *     That is, if the status handle of all its MPI_ISsend have turned

         *     to completed or not. If all the local"local-send" have been

         *     completed, we initiate a non-blocking barrier (i.e.,

         * MPI_IBarrier) on the current processor. This informs the network that

         * the current processor has witnessed its part of an event (in this

         * case the event is the completion of all its "local-send"). (b) the

         * above only informs the network that the all "local-send" of the

         *     current processor have been received. But the current processor

         * can still have incoming messages to be receieved. Hence, the current

         *     processor keeps on probing and receiving incoming messages, until

         *     the non-blocking barrier (MPI_IBarrier) (as mentioned

         *     above in (a)) has been invoked by all the processors. This can be

         *     checked from the status handle of the MPI_IBarrier, which

         *     completes only when all processors call it.

         *     At a stage when the status of MPI_IBarrier turns to completed,

         *     we know for sure that all the "local-send" of all

         *     the processors have been received and that there are no more

         *     incoming messages in any processor to be received. Thus, we

         *     can now safely terminate the nonblocking probe on all processors.

         *

         *

         *

         * @note: Since we are only interested in knowing the requesting

         * processors for the current processor, we only need token

         * MPI sends and receives (e.g., just an integer across) instead

         * of large chunks of data. To that end, we harcode all the send

         * and receive buffers to be of integer type

         */


      public:

        MPIRequestersNBX(const std::vector<size_type> &targetIDs,

                         const MPIComm &               comm);

        //

        // default Constructor for serial (without MPI) compilation

        //

        MPIRequestersNBX() = default;


        std::vector<size_type>

        getRequestingRankIds() override;


      private:

        std::vector<size_type> d_targetIDs;


        std::vector<int> d_sendBuffers;


        std::vector<MPIRequest> d_sendRequests;


        std::vector<std::unique_ptr<int>> d_recvBuffers;


        std::vector<std::unique_ptr<MPIRequest>> d_recvRequests;


        //

        // request for barrier

        //

        MPIRequest d_barrierRequest;


        //

        // MPI communicator

        //

        const MPIComm &d_comm;


        std::set<size_type> d_requestingProcesses;


        int d_numProcessors;

        int d_myRank;


        bool

        haveAllLocalSendReceived();


        void

        signalLocalSendCompletion();


        bool

        haveAllIncomingMsgsReceived();


        void

        probeAndReceiveIncomingMsg();


        void

        startLocalSend();


        void

        finish();

      };


    } // end of namespace mpi

  }   // end of namespace utils

} // end of namespace dftefe

#endif // dftefeMPIRequestersNBX_h

MPIRequestersBase.h

MPITypes.h

TypeConfig.h

dftefe::utils::mpi::MPIRequestersBase
Definition: MPIRequestersBase.h:37

dftefe::utils::mpi::MPIRequestersNBX
Definition: MPIRequestersNBX.h:42

dftefe::utils::mpi::MPIRequestersNBX::d_recvBuffers
std::vector< std::unique_ptr< int > > d_recvBuffers
Definition: MPIRequestersNBX.h:202

dftefe::utils::mpi::MPIRequestersNBX::d_targetIDs
std::vector< size_type > d_targetIDs
Definition: MPIRequestersNBX.h:182

dftefe::utils::mpi::MPIRequestersNBX::d_requestingProcesses
std::set< size_type > d_requestingProcesses
Definition: MPIRequestersNBX.h:222

dftefe::utils::mpi::MPIRequestersNBX::d_recvRequests
std::vector< std::unique_ptr< MPIRequest > > d_recvRequests
Definition: MPIRequestersNBX.h:207

dftefe::utils::mpi::MPIRequestersNBX::haveAllLocalSendReceived
bool haveAllLocalSendReceived()
Definition: MPIRequestersNBX.cpp:176

dftefe::utils::mpi::MPIRequestersNBX::MPIRequestersNBX
MPIRequestersNBX()=default

dftefe::utils::mpi::MPIRequestersNBX::d_sendBuffers
std::vector< int > d_sendBuffers
Definition: MPIRequestersNBX.h:187

dftefe::utils::mpi::MPIRequestersNBX::probeAndReceiveIncomingMsg
void probeAndReceiveIncomingMsg()
Definition: MPIRequestersNBX.cpp:116

dftefe::utils::mpi::MPIRequestersNBX::d_comm
const MPIComm & d_comm
Definition: MPIRequestersNBX.h:217

dftefe::utils::mpi::MPIRequestersNBX::d_barrierRequest
MPIRequest d_barrierRequest
Definition: MPIRequestersNBX.h:212

dftefe::utils::mpi::MPIRequestersNBX::d_sendRequests
std::vector< MPIRequest > d_sendRequests
Definition: MPIRequestersNBX.h:192

dftefe::utils::mpi::MPIRequestersNBX::finish
void finish()
Definition: MPIRequestersNBX.cpp:221

dftefe::utils::mpi::MPIRequestersNBX::d_numProcessors
int d_numProcessors
Definition: MPIRequestersNBX.h:224

dftefe::utils::mpi::MPIRequestersNBX::haveAllIncomingMsgsReceived
bool haveAllIncomingMsgsReceived()
Definition: MPIRequestersNBX.cpp:207

dftefe::utils::mpi::MPIRequestersNBX::d_myRank
int d_myRank
Definition: MPIRequestersNBX.h:225

dftefe::utils::mpi::MPIRequestersNBX::startLocalSend
void startLocalSend()
Definition: MPIRequestersNBX.cpp:80

dftefe::utils::mpi::MPIRequestersNBX::getRequestingRankIds
std::vector< size_type > getRequestingRankIds() override
Definition: MPIRequestersNBX.cpp:61

dftefe::utils::mpi::MPIRequestersNBX::signalLocalSendCompletion
void signalLocalSendCompletion()
Definition: MPIRequestersNBX.cpp:197

dftefe::utils::mpi::MPIComm
int MPIComm
Definition: MPITypes.h:83

dftefe::utils::mpi::MPIRequest
int MPIRequest
Definition: MPITypes.h:84

dftefe
dealii includes
Definition: AtomFieldDataSpherical.cpp:31