27#if defined(DFTFE_WITH_DEVICE)
34 template <dftfe::utils::MemorySpace memorySpace>
43 template <
typename ValueType>
48 ValueType *output)
const;
50 template <
typename ValueType>
55 ValueType *output)
const;
79 const std::complex<float> *alpha,
80 const std::complex<float> *A,
82 const std::complex<float> *B,
84 const std::complex<float> *beta,
85 std::complex<float> *C,
112 const std::complex<double> *alpha,
113 const std::complex<double> *A,
115 const std::complex<double> *B,
117 const std::complex<double> *beta,
118 std::complex<double> *C,
151 const std::complex<double> *alpha,
152 const std::complex<double> *A,
154 const std::complex<double> *x,
156 const std::complex<double> *beta,
157 std::complex<double> *y,
164 const std::complex<float> *alpha,
165 const std::complex<float> *A,
167 const std::complex<float> *x,
169 const std::complex<float> *beta,
170 std::complex<float> *y,
174 template <
typename ValueType1,
typename ValueType2>
187 template <
typename ValueType>
191 const ValueType *input1,
192 const ValueType *input2,
203 const MPI_Comm &mpi_communicator,
204 double *result)
const;
210 const std::complex<double> *x,
212 const MPI_Comm &mpi_communicator,
213 double *result)
const;
221 double *result)
const;
229 const MPI_Comm &mpi_communicator,
230 double *result)
const;
235 const std::complex<double> *X,
237 const std::complex<double> *Y,
239 std::complex<double> *result)
const;
244 const std::complex<double> *X,
246 const std::complex<double> *Y,
248 const MPI_Comm &mpi_communicator,
249 std::complex<double> *result)
const;
253 template <
typename ValueType>
259 const ValueType *onesVec,
260 ValueType *tempVector,
261 ValueType *tempResults,
262 ValueType *result)
const;
265 template <
typename ValueType>
271 const ValueType *onesVec,
272 ValueType *tempVector,
273 ValueType *tempResults,
274 const MPI_Comm &mpi_communicator,
275 ValueType *result)
const;
290 const std::complex<double> *alpha,
291 const std::complex<double> *x,
293 std::complex<double> *y,
308 const std::complex<float> *alpha,
309 const std::complex<float> *x,
311 std::complex<float> *y,
325 const std::complex<double> *x,
327 std::complex<double> *y,
341 const std::complex<float> *x,
343 std::complex<float> *y,
381 const std::complex<double> *alpha,
382 const std::complex<double> *A[],
384 const std::complex<double> *B[],
386 const std::complex<double> *beta,
387 std::complex<double> *C[],
414 const std::complex<float> *alpha,
415 const std::complex<float> *A[],
417 const std::complex<float> *B[],
419 const std::complex<float> *beta,
420 std::complex<float> *C[],
434 long long int strideA,
437 long long int strideB,
441 long long int strideC,
450 const std::complex<double> *alpha,
451 const std::complex<double> *A,
453 long long int strideA,
454 const std::complex<double> *B,
456 long long int strideB,
457 const std::complex<double> *beta,
458 std::complex<double> *C,
460 long long int strideC,
469 const std::complex<float> *alpha,
470 const std::complex<float> *A,
472 long long int strideA,
473 const std::complex<float> *B,
475 long long int strideB,
476 const std::complex<float> *beta,
477 std::complex<float> *C,
479 long long int strideC,
491 long long int strideA,
494 long long int strideB,
498 long long int strideC,
501 template <
typename ValueTypeComplex,
typename ValueTypeReal>
504 const ValueTypeComplex *complexArr,
505 ValueTypeReal *realArr,
506 ValueTypeReal *imagArr);
509 template <
typename ValueTypeComplex,
typename ValueTypeReal>
512 const ValueTypeReal *realArr,
513 const ValueTypeReal *imagArr,
514 ValueTypeComplex *complexArr);
516 template <
typename ValueType1,
typename ValueType2>
519 const ValueType1 *valueType1Arr,
520 ValueType2 *valueType2Arr);
523 template <
typename ValueType1,
typename ValueType2>
528 const ValueType1 *copyFromVec,
529 ValueType2 *copyToVecBlock,
530 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
533 template <
typename ValueType1,
typename ValueType2>
539 const ValueType1 *copyFromVec,
540 ValueType2 *copyToVecBlock,
541 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
543 template <
typename ValueType1,
typename ValueType2>
548 const ValueType1 *copyFromVecBlock,
549 ValueType2 *copyToVec,
550 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
552 template <
typename ValueType1,
typename ValueType2>
558 const ValueType1 *copyFromVec,
559 ValueType2 *copyToVec)
const;
562 template <
typename ValueType1,
typename ValueType2>
570 const ValueType1 *copyFromVec,
571 ValueType2 *copyToVec);
574 template <
typename ValueType1,
typename ValueType2>
580 const ValueType1 *copyFromVec,
581 ValueType2 *copyToVec);
583 template <
typename ValueType1,
typename ValueType2>
587 const ValueType1 *addFromVec,
588 const ValueType2 *scalingVector,
590 ValueType1 *addToVec)
const;
593 template <
typename ValueType1,
typename ValueType2>
597 const ValueType1 *addFromVec,
598 const ValueType2 *scalingVector,
601 ValueType1 *addToVec)
const;
602 template <
typename ValueType1,
typename ValueType2>
605 const ValueType2 alpha,
607 const ValueType2 beta,
608 ValueType1 *y)
const;
609 template <
typename ValueType0,
617 const ValueType0 alpha,
621 ValueType4 *C)
const;
623 template <
typename ValueType>
628 const ValueType *addFromVec,
630 const dftfe::uInt *addToVecStartingContiguousBlockIds)
const;
632 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
639 const ValueType2 *addFromVec,
640 ValueType3 *addToVec,
641 const dftfe::uInt *addToVecStartingContiguousBlockIds)
const;
642 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
648 const ValueType2 *addFromVec,
649 ValueType3 *addToVec,
650 const dftfe::uInt *addToVecStartingContiguousBlockIds)
const;
652 template <
typename ValueType1,
typename ValueType2>
660 template <
typename ValueType1,
typename ValueType2>
667 const ValueType2 *copyFromVec,
668 ValueType2 *copyToVecBlock,
669 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
671 template <
typename ValueType>
675 const ValueType *beta,
678 template <
typename ValueType>
683 const ValueType *beta,
686 template <
typename ValueType>
692 const ValueType *alpha,
694 const ValueType *beta,
697 template <
typename ValueType1,
typename ValueType2>
706#if defined(DFTFE_WITH_DEVICE)
708 enum class tensorOpDataType
722 template <
typename ValueType1,
typename ValueType2>
724 copyValueType1ArrToValueType2ArrDeviceCall(
726 const ValueType1 *valueType1Arr,
727 ValueType2 *valueType2Arr,
730 template <
typename ValueType>
735 ValueType *output)
const;
737 template <
typename ValueType>
742 ValueType *output)
const;
746 xgemm(
const char transA,
761 xgemm(
const char transA,
766 const std::complex<float> *alpha,
767 const std::complex<float> *A,
769 const std::complex<float> *B,
771 const std::complex<float> *beta,
772 std::complex<float> *C,
777 xgemm(
const char transA,
794 xgemm(
const char transA,
799 const std::complex<double> *alpha,
800 const std::complex<double> *A,
802 const std::complex<double> *B,
804 const std::complex<double> *beta,
805 std::complex<double> *C,
810 xgemv(
const char transA,
823 xgemv(
const char transA,
836 xgemv(
const char transA,
839 const std::complex<double> *alpha,
840 const std::complex<double> *A,
842 const std::complex<double> *x,
844 const std::complex<double> *beta,
845 std::complex<double> *y,
849 xgemv(
const char transA,
852 const std::complex<float> *alpha,
853 const std::complex<float> *A,
855 const std::complex<float> *x,
857 const std::complex<float> *beta,
858 std::complex<float> *y,
861 template <
typename ValueType>
863 addVecOverContinuousIndex(
const dftfe::uInt numContiguousBlocks,
865 const ValueType *input1,
866 const ValueType *input2,
871 template <
typename ValueType1,
typename ValueType2>
873 xscal(ValueType1 *x,
const ValueType2 alpha,
const dftfe::uInt n)
const;
882 const MPI_Comm &mpi_communicator,
883 double *result)
const;
889 const std::complex<double> *x,
891 const MPI_Comm &mpi_communicator,
892 double *result)
const;
901 double *result)
const;
911 const MPI_Comm &mpi_communicator,
912 double *result)
const;
917 const std::complex<double> *X,
919 const std::complex<double> *Y,
921 std::complex<double> *result)
const;
926 const std::complex<double> *X,
928 const std::complex<double> *Y,
930 const MPI_Comm &mpi_communicator,
931 std::complex<double> *result)
const;
934 template <
typename ValueType>
936 MultiVectorXDot(
const dftfe::uInt contiguousBlockSize,
940 const ValueType *onesVec,
941 ValueType *tempVector,
942 ValueType *tempResults,
943 ValueType *result)
const;
945 template <
typename ValueType>
947 MultiVectorXDot(
const dftfe::uInt contiguousBlockSize,
951 const ValueType *onesVec,
952 ValueType *tempVector,
953 ValueType *tempResults,
954 const MPI_Comm &mpi_communicator,
955 ValueType *result)
const;
969 const std::complex<double> *alpha,
970 const std::complex<double> *x,
972 std::complex<double> *y,
986 const std::complex<double> *x,
988 std::complex<double> *y,
1002 const std::complex<float> *x,
1004 std::complex<float> *y,
1009 xsymv(
const char UPLO,
1011 const double *alpha,
1021 xgemmBatched(
const char transA,
1026 const double *alpha,
1037 xgemmBatched(
const char transA,
1042 const std::complex<double> *alpha,
1043 const std::complex<double> *A[],
1045 const std::complex<double> *B[],
1047 const std::complex<double> *beta,
1048 std::complex<double> *C[],
1053 xgemmBatched(
const char transA,
1069 xgemmBatched(
const char transA,
1074 const std::complex<float> *alpha,
1075 const std::complex<float> *A[],
1077 const std::complex<float> *B[],
1079 const std::complex<float> *beta,
1080 std::complex<float> *C[],
1085 xgemmStridedBatched(
const char transA,
1090 const double *alpha,
1093 long long int strideA,
1096 long long int strideB,
1100 long long int strideC,
1104 xgemmStridedBatched(
const char transA,
1109 const std::complex<double> *alpha,
1110 const std::complex<double> *A,
1112 long long int strideA,
1113 const std::complex<double> *B,
1115 long long int strideB,
1116 const std::complex<double> *beta,
1117 std::complex<double> *C,
1119 long long int strideC,
1123 xgemmStridedBatched(
const char transA,
1128 const std::complex<float> *alpha,
1129 const std::complex<float> *A,
1131 long long int strideA,
1132 const std::complex<float> *B,
1134 long long int strideB,
1135 const std::complex<float> *beta,
1136 std::complex<float> *C,
1138 long long int strideC,
1142 xgemmStridedBatched(
const char transA,
1150 long long int strideA,
1153 long long int strideB,
1157 long long int strideC,
1160 template <
typename ValueTypeComplex,
typename ValueTypeReal>
1163 const ValueTypeComplex *complexArr,
1164 ValueTypeReal *realArr,
1165 ValueTypeReal *imagArr);
1168 template <
typename ValueTypeComplex,
typename ValueTypeReal>
1171 const ValueTypeReal *realArr,
1172 const ValueTypeReal *imagArr,
1173 ValueTypeComplex *complexArr);
1175 template <
typename ValueType1,
typename ValueType2>
1177 copyValueType1ArrToValueType2Arr(
const dftfe::uInt size,
1178 const ValueType1 *valueType1Arr,
1179 ValueType2 *valueType2Arr);
1182 template <
typename ValueType1,
typename ValueType2>
1187 const ValueType1 *copyFromVec,
1188 ValueType2 *copyToVecBlock,
1189 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
1191 template <
typename ValueType1,
typename ValueType2>
1197 const ValueType1 *copyFromVec,
1198 ValueType2 *copyToVecBlock,
1199 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
1202 template <
typename ValueType1,
typename ValueType2>
1204 stridedCopyFromBlock(
1207 const ValueType1 *copyFromVecBlock,
1208 ValueType2 *copyToVec,
1209 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
1211 template <
typename ValueType1,
typename ValueType2>
1213 stridedCopyToBlockConstantStride(
const dftfe::uInt blockSizeTo,
1217 const ValueType1 *copyFromVec,
1218 ValueType2 *copyToVec)
const;
1221 template <
typename ValueType1,
typename ValueType2>
1223 stridedCopyConstantStride(
const dftfe::uInt blockSize,
1229 const ValueType1 *copyFromVec,
1230 ValueType2 *copyToVec);
1233 template <
typename ValueType1,
typename ValueType2>
1235 stridedCopyFromBlockConstantStride(
const dftfe::uInt blockSizeTo,
1239 const ValueType1 *copyFromVec,
1240 ValueType2 *copyToVec);
1241 template <
typename ValueType1,
typename ValueType2>
1244 const ValueType2 alpha,
1245 const ValueType1 *x,
1246 const ValueType2 beta,
1247 ValueType1 *y)
const;
1249 template <
typename ValueType1,
typename ValueType2>
1251 stridedBlockAxpy(
const dftfe::uInt contiguousBlockSize,
1253 const ValueType1 *addFromVec,
1254 const ValueType2 *scalingVector,
1256 ValueType1 *addToVec)
const;
1257 template <
typename ValueType1,
typename ValueType2>
1259 stridedBlockAxpBy(
const dftfe::uInt contiguousBlockSize,
1261 const ValueType1 *addFromVec,
1262 const ValueType2 *scalingVector,
1265 ValueType1 *addToVec)
const;
1267 template <
typename ValueType0,
1268 typename ValueType1,
1269 typename ValueType2,
1270 typename ValueType3,
1271 typename ValueType4>
1275 const ValueType0 alpha,
1276 const ValueType1 *A,
1277 const ValueType2 *B,
1278 const ValueType3 *D,
1279 ValueType4 *C)
const;
1282 template <
typename ValueType>
1284 axpyStridedBlockAtomicAdd(
1287 const ValueType *addFromVec,
1288 ValueType *addToVec,
1289 const dftfe::uInt *addToVecStartingContiguousBlockIds)
const;
1291 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
1293 axpyStridedBlockAtomicAdd(
1297 const ValueType1 *s,
1298 const ValueType2 *addFromVec,
1299 ValueType3 *addToVec,
1300 const dftfe::uInt *addToVecStartingContiguousBlockIds)
const;
1301 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
1303 axpyStridedBlockAtomicAdd(
1307 const ValueType2 *addFromVec,
1308 ValueType3 *addToVec,
1309 const dftfe::uInt *addToVecStartingContiguousBlockIds)
const;
1311 template <
typename ValueType1,
typename ValueType2>
1313 stridedBlockScale(
const dftfe::uInt contiguousBlockSize,
1316 const ValueType1 *s,
1318 template <
typename ValueType1,
typename ValueType2>
1320 stridedBlockScaleCopy(
1324 const ValueType1 *s,
1325 const ValueType2 *copyFromVec,
1326 ValueType2 *copyToVecBlock,
1327 const dftfe::uInt *copyFromVecStartingContiguousBlockIds);
1329 template <
typename ValueType>
1331 stridedBlockScaleColumnWise(
const dftfe::uInt contiguousBlockSize,
1333 const ValueType *beta,
1336 template <
typename ValueType>
1338 stridedBlockScaleAndAddColumnWise(
const dftfe::uInt contiguousBlockSize,
1341 const ValueType *beta,
1344 template <
typename ValueType>
1346 stridedBlockScaleAndAddTwoVecColumnWise(
1350 const ValueType *alpha,
1352 const ValueType *beta,
1355 template <
typename ValueType1,
typename ValueType2>
1357 rightDiagonalScale(
const dftfe::uInt numberofVectors,
1363 getDeviceBlasHandle();
1366 template <
typename ValueType1,
typename ValueType2>
1368 copyBlockDiagonalValueType1OffDiagonalValueType2FromValueType1Arr(
1372 const ValueType1 *valueType1SrcArray,
1373 ValueType1 *valueType1DstArray,
1374 ValueType2 *valueType2DstArray);
1377 setTensorOpDataType(tensorOpDataType opType)
1386# ifdef DFTFE_WITH_DEVICE_AMD
1394 tensorOpDataType d_opType;
void xcopy(const dftfe::uInt n, const float *x, const dftfe::uInt incx, float *y, const dftfe::uInt incy) const
void xaxpy(const dftfe::uInt n, const std::complex< float > *alpha, const std::complex< float > *x, const dftfe::uInt incx, std::complex< float > *y, const dftfe::uInt incy) const
void stridedCopyToBlock(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const dftfe::uInt startingVecId, const ValueType1 *copyFromVec, ValueType2 *copyToVecBlock, const dftfe::uInt *copyFromVecStartingContiguousBlockIds)
void stridedBlockAxpy(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 *addFromVec, const ValueType2 *scalingVector, const ValueType2 a, ValueType1 *addToVec) const
void axpyStridedBlockAtomicAdd(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 a, const ValueType2 *addFromVec, ValueType3 *addToVec, const dftfe::uInt *addToVecStartingContiguousBlockIds) const
void hadamardProduct(const dftfe::uInt m, const ValueType *X, const ValueType *Y, ValueType *output) const
void xgemv(const char transA, const dftfe::uInt m, const dftfe::uInt n, const std::complex< float > *alpha, const std::complex< float > *A, const dftfe::uInt lda, const std::complex< float > *x, const dftfe::uInt incx, const std::complex< float > *beta, std::complex< float > *y, const dftfe::uInt incy) const
void xgemmStridedBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const std::complex< double > *alpha, const std::complex< double > *A, const dftfe::uInt lda, long long int strideA, const std::complex< double > *B, const dftfe::uInt ldb, long long int strideB, const std::complex< double > *beta, std::complex< double > *C, const dftfe::uInt ldc, long long int strideC, const dftfe::Int batchCount) const
void stridedBlockScaleCopy(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 a, const ValueType1 *s, const ValueType2 *copyFromVec, ValueType2 *copyToVecBlock, const dftfe::uInt *copyFromVecStartingContiguousBlockIds)
void xnrm2(const dftfe::uInt n, const double *x, const dftfe::uInt incx, const MPI_Comm &mpi_communicator, double *result) const
void xgemmBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const double *alpha, const double *A[], const dftfe::uInt lda, const double *B[], const dftfe::uInt ldb, const double *beta, double *C[], const dftfe::uInt ldc, const dftfe::Int batchCount) const
void xscal(ValueType1 *x, const ValueType2 alpha, const dftfe::uInt n) const
void copyValueType1ArrToValueType2Arr(const dftfe::uInt size, const ValueType1 *valueType1Arr, ValueType2 *valueType2Arr)
void xgemm(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const double *alpha, const double *A, const dftfe::uInt lda, const double *B, const dftfe::uInt ldb, const double *beta, double *C, const dftfe::uInt ldc) const
void xdot(const dftfe::uInt N, const double *X, const dftfe::uInt INCX, const double *Y, const dftfe::uInt INCY, const MPI_Comm &mpi_communicator, double *result) const
void hadamardProductWithConj(const dftfe::uInt m, const ValueType *X, const ValueType *Y, ValueType *output) const
void xgemmBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const std::complex< float > *alpha, const std::complex< float > *A[], const dftfe::uInt lda, const std::complex< float > *B[], const dftfe::uInt ldb, const std::complex< float > *beta, std::complex< float > *C[], const dftfe::uInt ldc, const dftfe::Int batchCount) const
void xgemv(const char transA, const dftfe::uInt m, const dftfe::uInt n, const float *alpha, const float *A, const dftfe::uInt lda, const float *x, const dftfe::uInt incx, const float *beta, float *y, const dftfe::uInt incy) const
void MultiVectorXDot(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType *X, const ValueType *Y, const ValueType *onesVec, ValueType *tempVector, ValueType *tempResults, ValueType *result) const
void MultiVectorXDot(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType *X, const ValueType *Y, const ValueType *onesVec, ValueType *tempVector, ValueType *tempResults, const MPI_Comm &mpi_communicator, ValueType *result) const
void xsymv(const char UPLO, const dftfe::uInt N, const double *alpha, const double *A, const dftfe::uInt LDA, const double *X, const dftfe::uInt INCX, const double *beta, double *C, const dftfe::uInt INCY) const
void copyComplexArrToRealArrs(const dftfe::uInt size, const ValueTypeComplex *complexArr, ValueTypeReal *realArr, ValueTypeReal *imagArr)
void stridedBlockScale(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 a, const ValueType1 *s, ValueType2 *x)
void xdot(const dftfe::uInt N, const std::complex< double > *X, const dftfe::uInt INCX, const std::complex< double > *Y, const dftfe::uInt INCY, const MPI_Comm &mpi_communicator, std::complex< double > *result) const
void xgemmStridedBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const std::complex< float > *alpha, const std::complex< float > *A, const dftfe::uInt lda, long long int strideA, const std::complex< float > *B, const dftfe::uInt ldb, long long int strideB, const std::complex< float > *beta, std::complex< float > *C, const dftfe::uInt ldc, long long int strideC, const dftfe::Int batchCount) const
void xgemm(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const std::complex< double > *alpha, const std::complex< double > *A, const dftfe::uInt lda, const std::complex< double > *B, const dftfe::uInt ldb, const std::complex< double > *beta, std::complex< double > *C, const dftfe::uInt ldc) const
void xcopy(const dftfe::uInt n, const double *x, const dftfe::uInt incx, double *y, const dftfe::uInt incy) const
void xdot(const dftfe::uInt N, const std::complex< double > *X, const dftfe::uInt INCX, const std::complex< double > *Y, const dftfe::uInt INCY, std::complex< double > *result) const
void xgemmStridedBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const float *alpha, const float *A, const dftfe::uInt lda, long long int strideA, const float *B, const dftfe::uInt ldb, long long int strideB, const float *beta, float *C, const dftfe::uInt ldc, long long int strideC, const dftfe::Int batchCount) const
void axpby(const dftfe::uInt n, const ValueType2 alpha, const ValueType1 *x, const ValueType2 beta, ValueType1 *y) const
void axpyStridedBlockAtomicAdd(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 a, const ValueType1 *s, const ValueType2 *addFromVec, ValueType3 *addToVec, const dftfe::uInt *addToVecStartingContiguousBlockIds) const
void xaxpy(const dftfe::uInt n, const float *alpha, const float *x, const dftfe::uInt incx, float *y, const dftfe::uInt incy) const
void xgemm(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const float *alpha, const float *A, const dftfe::uInt lda, const float *B, const dftfe::uInt ldb, const float *beta, float *C, const dftfe::uInt ldc) const
void stridedCopyToBlock(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 *copyFromVec, ValueType2 *copyToVecBlock, const dftfe::uInt *copyFromVecStartingContiguousBlockIds)
void stridedCopyFromBlock(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 *copyFromVecBlock, ValueType2 *copyToVec, const dftfe::uInt *copyFromVecStartingContiguousBlockIds)
void stridedCopyToBlockConstantStride(const dftfe::uInt blockSizeTo, const dftfe::uInt blockSizeFrom, const dftfe::uInt numBlocks, const dftfe::uInt startingId, const ValueType1 *copyFromVec, ValueType2 *copyToVec) const
void stridedBlockScaleAndAddTwoVecColumnWise(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType *x, const ValueType *alpha, const ValueType *y, const ValueType *beta, ValueType *z)
void stridedBlockAxpBy(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType1 *addFromVec, const ValueType2 *scalingVector, const ValueType2 a, const ValueType2 b, ValueType1 *addToVec) const
void xnrm2(const dftfe::uInt n, const std::complex< double > *x, const dftfe::uInt incx, const MPI_Comm &mpi_communicator, double *result) const
void stridedBlockScaleColumnWise(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType *beta, ValueType *x)
void xgemmStridedBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const double *alpha, const double *A, const dftfe::uInt lda, long long int strideA, const double *B, const dftfe::uInt ldb, long long int strideB, const double *beta, double *C, const dftfe::uInt ldc, long long int strideC, const dftfe::Int batchCount) const
void xgemmBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const std::complex< double > *alpha, const std::complex< double > *A[], const dftfe::uInt lda, const std::complex< double > *B[], const dftfe::uInt ldb, const std::complex< double > *beta, std::complex< double > *C[], const dftfe::uInt ldc, const dftfe::Int batchCount) const
void stridedCopyFromBlockConstantStride(const dftfe::uInt blockSizeTo, const dftfe::uInt blockSizeFrom, const dftfe::uInt numBlocks, const dftfe::uInt startingId, const ValueType1 *copyFromVec, ValueType2 *copyToVec)
void stridedBlockScaleAndAddColumnWise(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType *x, const ValueType *beta, ValueType *y)
void ApaBD(const dftfe::uInt m, const dftfe::uInt n, const ValueType0 alpha, const ValueType1 *A, const ValueType2 *B, const ValueType3 *D, ValueType4 *C) const
void xaxpy(const dftfe::uInt n, const std::complex< double > *alpha, const std::complex< double > *x, const dftfe::uInt incx, std::complex< double > *y, const dftfe::uInt incy) const
void xgemv(const char transA, const dftfe::uInt m, const dftfe::uInt n, const double *alpha, const double *A, const dftfe::uInt lda, const double *x, const dftfe::uInt incx, const double *beta, double *y, const dftfe::uInt incy) const
void stridedCopyConstantStride(const dftfe::uInt blockSize, const dftfe::uInt strideTo, const dftfe::uInt strideFrom, const dftfe::uInt numBlocks, const dftfe::uInt startingToId, const dftfe::uInt startingFromId, const ValueType1 *copyFromVec, ValueType2 *copyToVec)
void xgemmBatched(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const float *alpha, const float *A[], const dftfe::uInt lda, const float *B[], const dftfe::uInt ldb, const float *beta, float *C[], const dftfe::uInt ldc, const dftfe::Int batchCount) const
void addVecOverContinuousIndex(const dftfe::uInt numContiguousBlocks, const dftfe::uInt contiguousBlockSize, const ValueType *input1, const ValueType *input2, ValueType *output)
void xaxpy(const dftfe::uInt n, const double *alpha, const double *x, const dftfe::uInt incx, double *y, const dftfe::uInt incy) const
void axpyStridedBlockAtomicAdd(const dftfe::uInt contiguousBlockSize, const dftfe::uInt numContiguousBlocks, const ValueType *addFromVec, ValueType *addToVec, const dftfe::uInt *addToVecStartingContiguousBlockIds) const
void rightDiagonalScale(const dftfe::uInt numberofVectors, const dftfe::uInt sizeOfVector, ValueType1 *X, ValueType2 *D)
void xgemv(const char transA, const dftfe::uInt m, const dftfe::uInt n, const std::complex< double > *alpha, const std::complex< double > *A, const dftfe::uInt lda, const std::complex< double > *x, const dftfe::uInt incx, const std::complex< double > *beta, std::complex< double > *y, const dftfe::uInt incy) const
void xcopy(const dftfe::uInt n, const std::complex< float > *x, const dftfe::uInt incx, std::complex< float > *y, const dftfe::uInt incy) const
void copyRealArrsToComplexArr(const dftfe::uInt size, const ValueTypeReal *realArr, const ValueTypeReal *imagArr, ValueTypeComplex *complexArr)
void xgemm(const char transA, const char transB, const dftfe::uInt m, const dftfe::uInt n, const dftfe::uInt k, const std::complex< float > *alpha, const std::complex< float > *A, const dftfe::uInt lda, const std::complex< float > *B, const dftfe::uInt ldb, const std::complex< float > *beta, std::complex< float > *C, const dftfe::uInt ldc) const
void xdot(const dftfe::uInt N, const double *X, const dftfe::uInt INCX, const double *Y, const dftfe::uInt INCY, double *result) const
void xcopy(const dftfe::uInt n, const std::complex< double > *x, const dftfe::uInt incx, std::complex< double > *y, const dftfe::uInt incy) const
Definition BLASWrapper.h:35
Definition BLASWrapper.h:33
cudaStream_t deviceStream_t
Definition DeviceTypeConfig.cu.h:27
cublasStatus_t deviceBlasStatus_t
Definition DeviceTypeConfig.cu.h:38
@ HOST
Definition MemorySpaceType.h:34
@ DEVICE
Definition MemorySpaceType.h:36
cublasHandle_t deviceBlasHandle_t
Definition DeviceTypeConfig.cu.h:36
Definition pseudoPotentialToDftfeConverter.cc:34
std::uint32_t uInt
Definition TypeConfig.h:10
@ LDA
Definition ExcSSDFunctionalBaseClass.h:35
std::int32_t Int
Definition TypeConfig.h:11