22#ifndef dftfeMPICommunicatorP2PKernels_h
23#define dftfeMPICommunicatorP2PKernels_h
30#ifdef DFTFE_WITH_DEVICE
37 template <
typename ValueType, dftfe::utils::MemorySpace memorySpace>
52 template <
typename ValueTypeComm>
69 template <
typename ValueTypeComm>
88 template <
typename ValueTypeComm>
104 template <
typename ValueType1,
typename ValueType2>
107 const ValueType1 *type1Array,
108 ValueType2 *type2Array);
111#ifdef DFTFE_WITH_DEVICE
112 template <
typename ValueType>
117 template <
typename ValueTypeComm>
123 &ownedLocalIndicesForTargetProcs,
129 template <
typename ValueTypeComm>
135 &ownedLocalIndicesForTargetProcs,
151 template <
typename ValueTypeComm>
157 &ownedLocalIndicesForTargetProcs,
165 gatherLocallyOwnedEntriesSendBufferToTargetProcsHalfPrec(
169 &ownedLocalIndicesForTargetProcs,
177 accumAddLocallyOwnedContrRecvBufferFromTargetProcsHalfPrec(
182 &ownedLocalIndicesForTargetProcs,
196 template <
typename ValueType1,
typename ValueType2>
200 const ValueType1 *type1Array,
201 ValueType2 *type2Array,
205 copyHalfPrecArrToValueTypeArr(
209 ValueType *valueTypeArray,
213 copyValueTypeArrToHalfPrecArr(
215 const ValueType *valueTypeArray,
Definition MPICommunicatorP2PKernels.h:39
static void gatherLocallyOwnedEntriesSendBufferToTargetProcs(const MemoryStorage< ValueType, memorySpace > &dataArray, const SizeTypeVector &ownedLocalIndicesForTargetProcs, const dftfe::uInt blockSize, MemoryStorage< ValueTypeComm, memorySpace > &sendBuffer)
Function template for architecture adaptable gather kernel to send buffer.
utils::MemoryStorage< dftfe::uInt, memorySpace > SizeTypeVector
Definition MPICommunicatorP2PKernels.h:41
static void accumInsertLocallyOwnedContrRecvBufferFromTargetProcs(const MemoryStorage< ValueTypeComm, memorySpace > &recvBuffer, const SizeTypeVector &ownedLocalIndicesForTargetProcs, const dftfe::uInt blockSize, const dftfe::uInt locallyOwnedSize, const dftfe::uInt ghostSize, MemoryStorage< ValueType, memorySpace > &dataArray)
Function template for architecture adaptable accumulate insert kernel from recv buffer.
static void copyValueType1ArrToValueType2Arr(const dftfe::uInt blockSize, const ValueType1 *type1Array, ValueType2 *type2Array)
Function template for copying type1 to type2.
static void accumAddLocallyOwnedContrRecvBufferFromTargetProcs(const MemoryStorage< ValueTypeComm, memorySpace > &recvBuffer, const SizeTypeVector &ownedLocalIndicesForTargetProcs, const dftfe::uInt blockSize, const dftfe::uInt locallyOwnedSize, const dftfe::uInt ghostSize, MemoryStorage< ValueType, memorySpace > &dataArray)
Function template for architecture adaptable accumulate add kernel from recv buffer.
Definition MemoryStorage.h:33
cudaStream_t deviceStream_t
Definition DeviceTypeConfig.cu.h:27
@ DEVICE
Definition MemorySpaceType.h:36
Definition pseudoPotentialToDftfeConverter.cc:34
std::uint32_t uInt
Definition TypeConfig.h:10
T type
Definition dftfeDataTypes.h:130