27#if defined(DFTFE_WITH_DEVICE)
34 template <dftfe::utils::MemorySpace memorySpace>
43 template <
typename ValueType>
48 ValueType * output)
const;
50 template <
typename ValueType>
55 ValueType * output)
const;
66 const unsigned int lda,
68 const unsigned int ldb,
71 const unsigned int ldc)
const;
79 const std::complex<float> *alpha,
80 const std::complex<float> *A,
81 const unsigned int lda,
82 const std::complex<float> *B,
83 const unsigned int ldb,
84 const std::complex<float> *beta,
85 std::complex<float> * C,
86 const unsigned int ldc)
const;
97 const unsigned int lda,
99 const unsigned int ldb,
102 const unsigned int ldc)
const;
109 const unsigned int m,
110 const unsigned int n,
111 const unsigned int k,
112 const std::complex<double> *alpha,
113 const std::complex<double> *A,
114 const unsigned int lda,
115 const std::complex<double> *B,
116 const unsigned int ldb,
117 const std::complex<double> *beta,
118 std::complex<double> * C,
119 const unsigned int ldc)
const;
123 const unsigned int m,
124 const unsigned int n,
125 const double * alpha,
127 const unsigned int lda,
129 const unsigned int incx,
132 const unsigned int incy)
const;
136 const unsigned int m,
137 const unsigned int n,
140 const unsigned int lda,
142 const unsigned int incx,
145 const unsigned int incy)
const;
149 const unsigned int m,
150 const unsigned int n,
151 const std::complex<double> *alpha,
152 const std::complex<double> *A,
153 const unsigned int lda,
154 const std::complex<double> *x,
155 const unsigned int incx,
156 const std::complex<double> *beta,
157 std::complex<double> * y,
158 const unsigned int incy)
const;
162 const unsigned int m,
163 const unsigned int n,
164 const std::complex<float> *alpha,
165 const std::complex<float> *A,
166 const unsigned int lda,
167 const std::complex<float> *x,
168 const unsigned int incx,
169 const std::complex<float> *beta,
170 std::complex<float> * y,
171 const unsigned int incy)
const;
174 template <
typename ValueType1,
typename ValueType2>
177 const ValueType2 alpha,
189 template <
typename ValueType>
193 const ValueType * input1,
194 const ValueType * input2,
204 const unsigned int incx,
205 const MPI_Comm & mpi_communicator,
206 double * result)
const;
212 const std::complex<double> *x,
213 const unsigned int incx,
214 const MPI_Comm & mpi_communicator,
215 double * result)
const;
220 const unsigned int INCX,
222 const unsigned int INCY,
223 double * result)
const;
228 const unsigned int INCX,
230 const unsigned int INCY,
231 const MPI_Comm & mpi_communicator,
232 double * result)
const;
237 const std::complex<double> *X,
238 const unsigned int INCX,
239 const std::complex<double> *Y,
240 const unsigned int INCY,
241 std::complex<double> * result)
const;
246 const std::complex<double> *X,
247 const unsigned int INCX,
248 const std::complex<double> *Y,
249 const unsigned int INCY,
250 const MPI_Comm & mpi_communicator,
251 std::complex<double> * result)
const;
255 template <
typename ValueType>
258 const unsigned int numContiguousBlocks,
261 const ValueType * onesVec,
262 ValueType * tempVector,
263 ValueType * tempResults,
264 ValueType * result)
const;
267 template <
typename ValueType>
270 const unsigned int numContiguousBlocks,
273 const ValueType * onesVec,
274 ValueType * tempVector,
275 ValueType * tempResults,
276 const MPI_Comm & mpi_communicator,
277 ValueType * result)
const;
283 const double * alpha,
285 const unsigned int incx,
287 const unsigned int incy)
const;
292 const std::complex<double> *alpha,
293 const std::complex<double> *x,
294 const unsigned int incx,
295 std::complex<double> * y,
296 const unsigned int incy)
const;
303 const unsigned int incx,
305 const unsigned int incy)
const;
310 const std::complex<float> *alpha,
311 const std::complex<float> *x,
312 const unsigned int incx,
313 std::complex<float> * y,
314 const unsigned int incy)
const;
320 const unsigned int incx,
322 const unsigned int incy)
const;
327 const std::complex<double> *x,
328 const unsigned int incx,
329 std::complex<double> * y,
330 const unsigned int incy)
const;
336 const unsigned int incx,
338 const unsigned int incy)
const;
343 const std::complex<float> *x,
344 const unsigned int incx,
345 std::complex<float> * y,
346 const unsigned int incy)
const;
351 const unsigned int N,
352 const double * alpha,
354 const unsigned int LDA,
356 const unsigned int INCX,
359 const unsigned int INCY)
const;
364 const unsigned int m,
365 const unsigned int n,
366 const unsigned int k,
367 const double * alpha,
369 const unsigned int lda,
371 const unsigned int ldb,
374 const unsigned int ldc,
375 const int batchCount)
const;
380 const unsigned int m,
381 const unsigned int n,
382 const unsigned int k,
383 const std::complex<double> *alpha,
384 const std::complex<double> *A[],
385 const unsigned int lda,
386 const std::complex<double> *B[],
387 const unsigned int ldb,
388 const std::complex<double> *beta,
389 std::complex<double> * C[],
390 const unsigned int ldc,
391 const int batchCount)
const;
397 const unsigned int m,
398 const unsigned int n,
399 const unsigned int k,
402 const unsigned int lda,
404 const unsigned int ldb,
407 const unsigned int ldc,
408 const int batchCount)
const;
413 const unsigned int m,
414 const unsigned int n,
415 const unsigned int k,
416 const std::complex<float> *alpha,
417 const std::complex<float> *A[],
418 const unsigned int lda,
419 const std::complex<float> *B[],
420 const unsigned int ldb,
421 const std::complex<float> *beta,
422 std::complex<float> * C[],
423 const unsigned int ldc,
424 const int batchCount)
const;
430 const unsigned int m,
431 const unsigned int n,
432 const unsigned int k,
433 const double * alpha,
435 const unsigned int lda,
436 long long int strideA,
438 const unsigned int ldb,
439 long long int strideB,
442 const unsigned int ldc,
443 long long int strideC,
444 const int batchCount)
const;
449 const unsigned int m,
450 const unsigned int n,
451 const unsigned int k,
452 const std::complex<double> *alpha,
453 const std::complex<double> *A,
454 const unsigned int lda,
455 long long int strideA,
456 const std::complex<double> *B,
457 const unsigned int ldb,
458 long long int strideB,
459 const std::complex<double> *beta,
460 std::complex<double> * C,
461 const unsigned int ldc,
462 long long int strideC,
463 const int batchCount)
const;
468 const unsigned int m,
469 const unsigned int n,
470 const unsigned int k,
471 const std::complex<float> *alpha,
472 const std::complex<float> *A,
473 const unsigned int lda,
474 long long int strideA,
475 const std::complex<float> *B,
476 const unsigned int ldb,
477 long long int strideB,
478 const std::complex<float> *beta,
479 std::complex<float> * C,
480 const unsigned int ldc,
481 long long int strideC,
482 const int batchCount)
const;
487 const unsigned int m,
488 const unsigned int n,
489 const unsigned int k,
492 const unsigned int lda,
493 long long int strideA,
495 const unsigned int ldb,
496 long long int strideB,
499 const unsigned int ldc,
500 long long int strideC,
501 const int batchCount)
const;
503 template <
typename ValueTypeComplex,
typename ValueTypeReal>
506 const ValueTypeComplex *complexArr,
507 ValueTypeReal * realArr,
508 ValueTypeReal * imagArr);
511 template <
typename ValueTypeComplex,
typename ValueTypeReal>
514 const ValueTypeReal * realArr,
515 const ValueTypeReal * imagArr,
516 ValueTypeComplex * complexArr);
518 template <
typename ValueType1,
typename ValueType2>
521 const ValueType1 * valueType1Arr,
522 ValueType2 * valueType2Arr);
525 template <
typename ValueType1,
typename ValueType2>
530 const ValueType1 * copyFromVec,
531 ValueType2 * copyToVecBlock,
535 template <
typename ValueType1,
typename ValueType2>
541 const ValueType1 * copyFromVec,
542 ValueType2 * copyToVecBlock,
545 template <
typename ValueType1,
typename ValueType2>
550 const ValueType1 * copyFromVecBlock,
551 ValueType2 * copyToVec,
554 template <
typename ValueType1,
typename ValueType2>
560 const ValueType1 * copyFromVec,
561 ValueType2 * copyToVec)
const;
564 template <
typename ValueType1,
typename ValueType2>
572 const ValueType1 * copyFromVec,
573 ValueType2 * copyToVec);
576 template <
typename ValueType1,
typename ValueType2>
582 const ValueType1 * copyFromVec,
583 ValueType2 * copyToVec);
585 template <
typename ValueType1,
typename ValueType2>
589 const ValueType1 * addFromVec,
590 const ValueType2 * scalingVector,
592 ValueType1 * addToVec)
const;
595 template <
typename ValueType1,
typename ValueType2>
599 const ValueType1 * addFromVec,
600 const ValueType2 * scalingVector,
603 ValueType1 * addToVec)
const;
604 template <
typename ValueType1,
typename ValueType2>
607 const ValueType2 alpha,
608 const ValueType1 * x,
609 const ValueType2 beta,
610 ValueType1 * y)
const;
611 template <
typename ValueType0,
618 const unsigned int n,
619 const ValueType0 alpha,
620 const ValueType1 * A,
621 const ValueType2 * B,
622 const ValueType3 * D,
623 ValueType4 * C)
const;
625 template <
typename ValueType>
629 const ValueType * addFromVec,
630 ValueType * addToVec,
632 *addToVecStartingContiguousBlockIds)
const;
634 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
639 const ValueType1 * s,
640 const ValueType2 * addFromVec,
641 ValueType3 * addToVec,
643 *addToVecStartingContiguousBlockIds)
const;
644 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
649 const ValueType2 * addFromVec,
650 ValueType3 * addToVec,
652 *addToVecStartingContiguousBlockIds)
const;
654 template <
typename ValueType1,
typename ValueType2>
659 const ValueType1 * s,
662 template <
typename ValueType1,
typename ValueType2>
668 const ValueType1 * s,
669 const ValueType2 * copyFromVec,
670 ValueType2 * copyToVecBlock,
679 template <
typename ValueType>
683 const ValueType beta,
686 template <
typename ValueType>
690 const ValueType * beta,
693 template <
typename ValueType>
699 const ValueType * beta,
702 template <
typename ValueType>
708 const ValueType * alpha,
710 const ValueType * beta,
713 template <
typename ValueType1,
typename ValueType2>
722#if defined(DFTFE_WITH_DEVICE)
724 enum class tensorOpDataType
738 template <
typename ValueType1,
typename ValueType2>
740 copyValueType1ArrToValueType2ArrDeviceCall(
742 const ValueType1 * valueType1Arr,
743 ValueType2 * valueType2Arr,
746 template <
typename ValueType>
748 hadamardProduct(
const unsigned int m,
751 ValueType * output)
const;
753 template <
typename ValueType>
755 hadamardProductWithConj(
const unsigned int m,
758 ValueType * output)
const;
762 xgemm(
const char transA,
764 const unsigned int m,
765 const unsigned int n,
766 const unsigned int k,
769 const unsigned int lda,
771 const unsigned int ldb,
774 const unsigned int ldc)
const;
777 xgemm(
const char transA,
779 const unsigned int m,
780 const unsigned int n,
781 const unsigned int k,
782 const std::complex<float> *alpha,
783 const std::complex<float> *A,
784 const unsigned int lda,
785 const std::complex<float> *B,
786 const unsigned int ldb,
787 const std::complex<float> *beta,
788 std::complex<float> * C,
789 const unsigned int ldc)
const;
793 xgemm(
const char transA,
795 const unsigned int m,
796 const unsigned int n,
797 const unsigned int k,
798 const double * alpha,
800 const unsigned int lda,
802 const unsigned int ldb,
805 const unsigned int ldc)
const;
810 xgemm(
const char transA,
812 const unsigned int m,
813 const unsigned int n,
814 const unsigned int k,
815 const std::complex<double> *alpha,
816 const std::complex<double> *A,
817 const unsigned int lda,
818 const std::complex<double> *B,
819 const unsigned int ldb,
820 const std::complex<double> *beta,
821 std::complex<double> * C,
822 const unsigned int ldc)
const;
826 xgemv(
const char transA,
827 const unsigned int m,
828 const unsigned int n,
829 const double * alpha,
831 const unsigned int lda,
833 const unsigned int incx,
836 const unsigned int incy)
const;
839 xgemv(
const char transA,
840 const unsigned int m,
841 const unsigned int n,
844 const unsigned int lda,
846 const unsigned int incx,
849 const unsigned int incy)
const;
852 xgemv(
const char transA,
853 const unsigned int m,
854 const unsigned int n,
855 const std::complex<double> *alpha,
856 const std::complex<double> *A,
857 const unsigned int lda,
858 const std::complex<double> *x,
859 const unsigned int incx,
860 const std::complex<double> *beta,
861 std::complex<double> * y,
862 const unsigned int incy)
const;
865 xgemv(
const char transA,
866 const unsigned int m,
867 const unsigned int n,
868 const std::complex<float> *alpha,
869 const std::complex<float> *A,
870 const unsigned int lda,
871 const std::complex<float> *x,
872 const unsigned int incx,
873 const std::complex<float> *beta,
874 std::complex<float> * y,
875 const unsigned int incy)
const;
877 template <
typename ValueType>
881 const ValueType * input1,
882 const ValueType * input2,
887 template <
typename ValueType1,
typename ValueType2>
889 xscal(ValueType1 * x,
890 const ValueType2 alpha,
897 xnrm2(
const unsigned int n,
899 const unsigned int incx,
900 const MPI_Comm & mpi_communicator,
901 double * result)
const;
906 xnrm2(
const unsigned int n,
907 const std::complex<double> *x,
908 const unsigned int incx,
909 const MPI_Comm & mpi_communicator,
910 double * result)
const;
914 xdot(
const unsigned int N,
916 const unsigned int INCX,
918 const unsigned int INCY,
919 double * result)
const;
924 xdot(
const unsigned int N,
926 const unsigned int INCX,
928 const unsigned int INCY,
929 const MPI_Comm & mpi_communicator,
930 double * result)
const;
934 xdot(
const unsigned int N,
935 const std::complex<double> *X,
936 const unsigned int INCX,
937 const std::complex<double> *Y,
938 const unsigned int INCY,
939 std::complex<double> * result)
const;
943 xdot(
const unsigned int N,
944 const std::complex<double> *X,
945 const unsigned int INCX,
946 const std::complex<double> *Y,
947 const unsigned int INCY,
948 const MPI_Comm & mpi_communicator,
949 std::complex<double> * result)
const;
952 template <
typename ValueType>
954 MultiVectorXDot(
const unsigned int contiguousBlockSize,
955 const unsigned int numContiguousBlocks,
958 const ValueType * onesVec,
959 ValueType * tempVector,
960 ValueType * tempResults,
961 ValueType * result)
const;
963 template <
typename ValueType>
965 MultiVectorXDot(
const unsigned int contiguousBlockSize,
966 const unsigned int numContiguousBlocks,
969 const ValueType * onesVec,
970 ValueType * tempVector,
971 ValueType * tempResults,
972 const MPI_Comm & mpi_communicator,
973 ValueType * result)
const;
977 xaxpy(
const unsigned int n,
978 const double * alpha,
980 const unsigned int incx,
982 const unsigned int incy)
const;
986 xaxpy(
const unsigned int n,
987 const std::complex<double> *alpha,
988 const std::complex<double> *x,
989 const unsigned int incx,
990 std::complex<double> * y,
991 const unsigned int incy)
const;
995 xcopy(
const unsigned int n,
997 const unsigned int incx,
999 const unsigned int incy)
const;
1003 xcopy(
const unsigned int n,
1004 const std::complex<double> *x,
1005 const unsigned int incx,
1006 std::complex<double> * y,
1007 const unsigned int incy)
const;
1011 xcopy(
const unsigned int n,
1013 const unsigned int incx,
1015 const unsigned int incy)
const;
1019 xcopy(
const unsigned int n,
1020 const std::complex<float> *x,
1021 const unsigned int incx,
1022 std::complex<float> * y,
1023 const unsigned int incy)
const;
1027 xsymv(
const char UPLO,
1028 const unsigned int N,
1029 const double * alpha,
1031 const unsigned int LDA,
1033 const unsigned int INCX,
1034 const double * beta,
1036 const unsigned int INCY)
const;
1039 xgemmBatched(
const char transA,
1041 const unsigned int m,
1042 const unsigned int n,
1043 const unsigned int k,
1044 const double * alpha,
1046 const unsigned int lda,
1048 const unsigned int ldb,
1049 const double * beta,
1051 const unsigned int ldc,
1052 const int batchCount)
const;
1055 xgemmBatched(
const char transA,
1057 const unsigned int m,
1058 const unsigned int n,
1059 const unsigned int k,
1060 const std::complex<double> *alpha,
1061 const std::complex<double> *A[],
1062 const unsigned int lda,
1063 const std::complex<double> *B[],
1064 const unsigned int ldb,
1065 const std::complex<double> *beta,
1066 std::complex<double> * C[],
1067 const unsigned int ldc,
1068 const int batchCount)
const;
1071 xgemmBatched(
const char transA,
1073 const unsigned int m,
1074 const unsigned int n,
1075 const unsigned int k,
1076 const float * alpha,
1078 const unsigned int lda,
1080 const unsigned int ldb,
1083 const unsigned int ldc,
1084 const int batchCount)
const;
1087 xgemmBatched(
const char transA,
1089 const unsigned int m,
1090 const unsigned int n,
1091 const unsigned int k,
1092 const std::complex<float> *alpha,
1093 const std::complex<float> *A[],
1094 const unsigned int lda,
1095 const std::complex<float> *B[],
1096 const unsigned int ldb,
1097 const std::complex<float> *beta,
1098 std::complex<float> * C[],
1099 const unsigned int ldc,
1100 const int batchCount)
const;
1103 xgemmStridedBatched(
const char transA,
1105 const unsigned int m,
1106 const unsigned int n,
1107 const unsigned int k,
1108 const double * alpha,
1110 const unsigned int lda,
1111 long long int strideA,
1113 const unsigned int ldb,
1114 long long int strideB,
1115 const double * beta,
1117 const unsigned int ldc,
1118 long long int strideC,
1119 const int batchCount)
const;
1122 xgemmStridedBatched(
const char transA,
1124 const unsigned int m,
1125 const unsigned int n,
1126 const unsigned int k,
1127 const std::complex<double> *alpha,
1128 const std::complex<double> *A,
1129 const unsigned int lda,
1130 long long int strideA,
1131 const std::complex<double> *B,
1132 const unsigned int ldb,
1133 long long int strideB,
1134 const std::complex<double> *beta,
1135 std::complex<double> * C,
1136 const unsigned int ldc,
1137 long long int strideC,
1138 const int batchCount)
const;
1141 xgemmStridedBatched(
const char transA,
1143 const unsigned int m,
1144 const unsigned int n,
1145 const unsigned int k,
1146 const std::complex<float> *alpha,
1147 const std::complex<float> *A,
1148 const unsigned int lda,
1149 long long int strideA,
1150 const std::complex<float> *B,
1151 const unsigned int ldb,
1152 long long int strideB,
1153 const std::complex<float> *beta,
1154 std::complex<float> * C,
1155 const unsigned int ldc,
1156 long long int strideC,
1157 const int batchCount)
const;
1160 xgemmStridedBatched(
const char transA,
1162 const unsigned int m,
1163 const unsigned int n,
1164 const unsigned int k,
1165 const float * alpha,
1167 const unsigned int lda,
1168 long long int strideA,
1170 const unsigned int ldb,
1171 long long int strideB,
1174 const unsigned int ldc,
1175 long long int strideC,
1176 const int batchCount)
const;
1178 template <
typename ValueTypeComplex,
typename ValueTypeReal>
1181 const ValueTypeComplex *complexArr,
1182 ValueTypeReal * realArr,
1183 ValueTypeReal * imagArr);
1186 template <
typename ValueTypeComplex,
typename ValueTypeReal>
1189 const ValueTypeReal * realArr,
1190 const ValueTypeReal * imagArr,
1191 ValueTypeComplex * complexArr);
1193 template <
typename ValueType1,
typename ValueType2>
1196 const ValueType1 * valueType1Arr,
1197 ValueType2 * valueType2Arr);
1200 template <
typename ValueType1,
typename ValueType2>
1205 const ValueType1 * copyFromVec,
1206 ValueType2 * copyToVecBlock,
1209 template <
typename ValueType1,
typename ValueType2>
1215 const ValueType1 * copyFromVec,
1216 ValueType2 * copyToVecBlock,
1220 template <
typename ValueType1,
typename ValueType2>
1222 stridedCopyFromBlock(
1225 const ValueType1 * copyFromVecBlock,
1226 ValueType2 * copyToVec,
1229 template <
typename ValueType1,
typename ValueType2>
1235 const ValueType1 * copyFromVec,
1236 ValueType2 * copyToVec)
const;
1239 template <
typename ValueType1,
typename ValueType2>
1247 const ValueType1 * copyFromVec,
1248 ValueType2 * copyToVec);
1251 template <
typename ValueType1,
typename ValueType2>
1257 const ValueType1 * copyFromVec,
1258 ValueType2 * copyToVec);
1259 template <
typename ValueType1,
typename ValueType2>
1261 axpby(
const unsigned int n,
1262 const ValueType2 alpha,
1263 const ValueType1 * x,
1264 const ValueType2 beta,
1265 ValueType1 * y)
const;
1267 template <
typename ValueType1,
typename ValueType2>
1271 const ValueType1 * addFromVec,
1272 const ValueType2 * scalingVector,
1274 ValueType1 * addToVec)
const;
1275 template <
typename ValueType1,
typename ValueType2>
1279 const ValueType1 * addFromVec,
1280 const ValueType2 * scalingVector,
1283 ValueType1 * addToVec)
const;
1285 template <
typename ValueType0,
1286 typename ValueType1,
1287 typename ValueType2,
1288 typename ValueType3,
1289 typename ValueType4>
1291 ApaBD(
const unsigned int m,
1292 const unsigned int n,
1293 const ValueType0 alpha,
1294 const ValueType1 * A,
1295 const ValueType2 * B,
1296 const ValueType3 * D,
1297 ValueType4 * C)
const;
1300 template <
typename ValueType>
1304 const ValueType * addFromVec,
1305 ValueType * addToVec,
1307 *addToVecStartingContiguousBlockIds)
const;
1309 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
1314 const ValueType1 * s,
1315 const ValueType2 * addFromVec,
1316 ValueType3 * addToVec,
1318 *addToVecStartingContiguousBlockIds)
const;
1319 template <
typename ValueType1,
typename ValueType2,
typename ValueType3>
1324 const ValueType2 * addFromVec,
1325 ValueType3 * addToVec,
1327 *addToVecStartingContiguousBlockIds)
const;
1329 template <
typename ValueType1,
typename ValueType2>
1334 const ValueType1 * s,
1336 template <
typename ValueType1,
typename ValueType2>
1338 stridedBlockScaleCopy(
1342 const ValueType1 * s,
1343 const ValueType2 * copyFromVec,
1344 ValueType2 * copyToVecBlock,
1353 template <
typename ValueType>
1357 const ValueType beta,
1360 template <
typename ValueType>
1364 const ValueType * beta,
1367 template <
typename ValueType>
1369 stridedBlockScaleAndAddColumnWise(
1372 const ValueType * x,
1373 const ValueType * beta,
1376 template <
typename ValueType>
1378 stridedBlockScaleAndAddTwoVecColumnWise(
1381 const ValueType * x,
1382 const ValueType * alpha,
1383 const ValueType * y,
1384 const ValueType * beta,
1387 template <
typename ValueType1,
typename ValueType2>
1395 getDeviceBlasHandle();
1398 template <
typename ValueType1,
typename ValueType2>
1400 copyBlockDiagonalValueType1OffDiagonalValueType2FromValueType1Arr(
1404 const ValueType1 * valueType1SrcArray,
1405 ValueType1 * valueType1DstArray,
1406 ValueType2 * valueType2DstArray);
1408# ifdef DFTFE_WITH_DEVICE_LANG_CUDA
1413 setTensorOpDataType(tensorOpDataType opType)
1422# ifdef DFTFE_WITH_DEVICE_AMD
1430 tensorOpDataType d_opType;
void axpby(const unsigned int n, const ValueType2 alpha, const ValueType1 *x, const ValueType2 beta, ValueType1 *y) const
void xgemv(const char transA, const unsigned int m, const unsigned int n, const float *alpha, const float *A, const unsigned int lda, const float *x, const unsigned int incx, const float *beta, float *y, const unsigned int incy) const
void xgemv(const char transA, const unsigned int m, const unsigned int n, const double *alpha, const double *A, const unsigned int lda, const double *x, const unsigned int incx, const double *beta, double *y, const unsigned int incy) const
void xdot(const unsigned int N, const std::complex< double > *X, const unsigned int INCX, const std::complex< double > *Y, const unsigned int INCY, const MPI_Comm &mpi_communicator, std::complex< double > *result) const
void xgemm(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const double *alpha, const double *A, const unsigned int lda, const double *B, const unsigned int ldb, const double *beta, double *C, const unsigned int ldc) const
void xaxpy(const unsigned int n, const std::complex< double > *alpha, const std::complex< double > *x, const unsigned int incx, std::complex< double > *y, const unsigned int incy) const
void xgemv(const char transA, const unsigned int m, const unsigned int n, const std::complex< double > *alpha, const std::complex< double > *A, const unsigned int lda, const std::complex< double > *x, const unsigned int incx, const std::complex< double > *beta, std::complex< double > *y, const unsigned int incy) const
void xnrm2(const unsigned int n, const std::complex< double > *x, const unsigned int incx, const MPI_Comm &mpi_communicator, double *result) const
void xgemmStridedBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const double *alpha, const double *A, const unsigned int lda, long long int strideA, const double *B, const unsigned int ldb, long long int strideB, const double *beta, double *C, const unsigned int ldc, long long int strideC, const int batchCount) const
void stridedCopyToBlock(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 *copyFromVec, ValueType2 *copyToVecBlock, const dftfe::global_size_type *copyFromVecStartingContiguousBlockIds)
void xcopy(const unsigned int n, const double *x, const unsigned int incx, double *y, const unsigned int incy) const
void xdot(const unsigned int N, const double *X, const unsigned int INCX, const double *Y, const unsigned int INCY, const MPI_Comm &mpi_communicator, double *result) const
void hadamardProduct(const unsigned int m, const ValueType *X, const ValueType *Y, ValueType *output) const
void xgemmStridedBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const std::complex< double > *alpha, const std::complex< double > *A, const unsigned int lda, long long int strideA, const std::complex< double > *B, const unsigned int ldb, long long int strideB, const std::complex< double > *beta, std::complex< double > *C, const unsigned int ldc, long long int strideC, const int batchCount) const
void xgemm(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const std::complex< float > *alpha, const std::complex< float > *A, const unsigned int lda, const std::complex< float > *B, const unsigned int ldb, const std::complex< float > *beta, std::complex< float > *C, const unsigned int ldc) const
void stridedBlockScale(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 a, const ValueType1 *s, ValueType2 *x)
void xgemmStridedBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const float *alpha, const float *A, const unsigned int lda, long long int strideA, const float *B, const unsigned int ldb, long long int strideB, const float *beta, float *C, const unsigned int ldc, long long int strideC, const int batchCount) const
void MultiVectorXDot(const unsigned int contiguousBlockSize, const unsigned int numContiguousBlocks, const ValueType *X, const ValueType *Y, const ValueType *onesVec, ValueType *tempVector, ValueType *tempResults, ValueType *result) const
void xdot(const unsigned int N, const std::complex< double > *X, const unsigned int INCX, const std::complex< double > *Y, const unsigned int INCY, std::complex< double > *result) const
void add(double *y, const double *x, const double alpha, const dftfe::size_type size)
void ApaBD(const unsigned int m, const unsigned int n, const ValueType0 alpha, const ValueType1 *A, const ValueType2 *B, const ValueType3 *D, ValueType4 *C) const
void axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 a, const ValueType1 *s, const ValueType2 *addFromVec, ValueType3 *addToVec, const dftfe::global_size_type *addToVecStartingContiguousBlockIds) const
void xcopy(const unsigned int n, const std::complex< double > *x, const unsigned int incx, std::complex< double > *y, const unsigned int incy) const
void xgemmBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const std::complex< float > *alpha, const std::complex< float > *A[], const unsigned int lda, const std::complex< float > *B[], const unsigned int ldb, const std::complex< float > *beta, std::complex< float > *C[], const unsigned int ldc, const int batchCount) const
void stridedCopyToBlock(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const dftfe::size_type startingVecId, const ValueType1 *copyFromVec, ValueType2 *copyToVecBlock, const dftfe::global_size_type *copyFromVecStartingContiguousBlockIds)
void stridedCopyToBlockConstantStride(const dftfe::size_type blockSizeTo, const dftfe::size_type blockSizeFrom, const dftfe::size_type numBlocks, const dftfe::size_type startingId, const ValueType1 *copyFromVec, ValueType2 *copyToVec) const
void xgemmBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const double *alpha, const double *A[], const unsigned int lda, const double *B[], const unsigned int ldb, const double *beta, double *C[], const unsigned int ldc, const int batchCount) const
void xgemmStridedBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const std::complex< float > *alpha, const std::complex< float > *A, const unsigned int lda, long long int strideA, const std::complex< float > *B, const unsigned int ldb, long long int strideB, const std::complex< float > *beta, std::complex< float > *C, const unsigned int ldc, long long int strideC, const int batchCount) const
void xcopy(const unsigned int n, const std::complex< float > *x, const unsigned int incx, std::complex< float > *y, const unsigned int incy) const
void axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 a, const ValueType2 *addFromVec, ValueType3 *addToVec, const dftfe::global_size_type *addToVecStartingContiguousBlockIds) const
void hadamardProductWithConj(const unsigned int m, const ValueType *X, const ValueType *Y, ValueType *output) const
void xaxpy(const unsigned int n, const float *alpha, const float *x, const unsigned int incx, float *y, const unsigned int incy) const
void stridedCopyFromBlockConstantStride(const dftfe::size_type blockSizeTo, const dftfe::size_type blockSizeFrom, const dftfe::size_type numBlocks, const dftfe::size_type startingId, const ValueType1 *copyFromVec, ValueType2 *copyToVec)
void xdot(const unsigned int N, const double *X, const unsigned int INCX, const double *Y, const unsigned int INCY, double *result) const
void xaxpy(const unsigned int n, const double *alpha, const double *x, const unsigned int incx, double *y, const unsigned int incy) const
void xgemm(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const float *alpha, const float *A, const unsigned int lda, const float *B, const unsigned int ldb, const float *beta, float *C, const unsigned int ldc) const
void axpyStridedBlockAtomicAdd(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType *addFromVec, ValueType *addToVec, const dftfe::global_size_type *addToVecStartingContiguousBlockIds) const
void copyValueType1ArrToValueType2Arr(const dftfe::size_type size, const ValueType1 *valueType1Arr, ValueType2 *valueType2Arr)
void stridedBlockScaleAndAddTwoVecColumnWise(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType *x, const ValueType *alpha, const ValueType *y, const ValueType *beta, ValueType *z)
void xsymv(const char UPLO, const unsigned int N, const double *alpha, const double *A, const unsigned int LDA, const double *X, const unsigned int INCX, const double *beta, double *C, const unsigned int INCY) const
void copyComplexArrToRealArrs(const dftfe::size_type size, const ValueTypeComplex *complexArr, ValueTypeReal *realArr, ValueTypeReal *imagArr)
void stridedBlockScaleCopy(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 a, const ValueType1 *s, const ValueType2 *copyFromVec, ValueType2 *copyToVecBlock, const dftfe::global_size_type *copyFromVecStartingContiguousBlockIds)
void stridedBlockScaleColumnWise(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType *beta, ValueType *x)
void stridedBlockAxpBy(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 *addFromVec, const ValueType2 *scalingVector, const ValueType2 a, const ValueType2 b, ValueType1 *addToVec) const
void xgemmBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const std::complex< double > *alpha, const std::complex< double > *A[], const unsigned int lda, const std::complex< double > *B[], const unsigned int ldb, const std::complex< double > *beta, std::complex< double > *C[], const unsigned int ldc, const int batchCount) const
void xnrm2(const unsigned int n, const double *x, const unsigned int incx, const MPI_Comm &mpi_communicator, double *result) const
void addVecOverContinuousIndex(const dftfe::size_type numContiguousBlocks, const dftfe::size_type contiguousBlockSize, const ValueType *input1, const ValueType *input2, ValueType *output)
void stridedCopyFromBlock(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 *copyFromVecBlock, ValueType2 *copyToVec, const dftfe::global_size_type *copyFromVecStartingContiguousBlockIds)
void xgemm(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const std::complex< double > *alpha, const std::complex< double > *A, const unsigned int lda, const std::complex< double > *B, const unsigned int ldb, const std::complex< double > *beta, std::complex< double > *C, const unsigned int ldc) const
void rightDiagonalScale(const dftfe::size_type numberofVectors, const dftfe::size_type sizeOfVector, ValueType1 *X, ValueType2 *D)
void xaxpy(const unsigned int n, const std::complex< float > *alpha, const std::complex< float > *x, const unsigned int incx, std::complex< float > *y, const unsigned int incy) const
void stridedBlockScaleAndAddColumnWise(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType *x, const ValueType *beta, ValueType *y)
void xgemv(const char transA, const unsigned int m, const unsigned int n, const std::complex< float > *alpha, const std::complex< float > *A, const unsigned int lda, const std::complex< float > *x, const unsigned int incx, const std::complex< float > *beta, std::complex< float > *y, const unsigned int incy) const
void xgemmBatched(const char transA, const char transB, const unsigned int m, const unsigned int n, const unsigned int k, const float *alpha, const float *A[], const unsigned int lda, const float *B[], const unsigned int ldb, const float *beta, float *C[], const unsigned int ldc, const int batchCount) const
void stridedCopyConstantStride(const dftfe::size_type blockSize, const dftfe::size_type strideTo, const dftfe::size_type strideFrom, const dftfe::size_type numBlocks, const dftfe::size_type startingToId, const dftfe::size_type startingFromId, const ValueType1 *copyFromVec, ValueType2 *copyToVec)
void sadd(ValueType *y, ValueType *x, const ValueType beta, const dftfe::size_type size)
void xcopy(const unsigned int n, const float *x, const unsigned int incx, float *y, const unsigned int incy) const
void stridedBlockAxpy(const dftfe::size_type contiguousBlockSize, const dftfe::size_type numContiguousBlocks, const ValueType1 *addFromVec, const ValueType2 *scalingVector, const ValueType2 a, ValueType1 *addToVec) const
void xscal(ValueType1 *x, const ValueType2 alpha, const dftfe::size_type n) const
void MultiVectorXDot(const unsigned int contiguousBlockSize, const unsigned int numContiguousBlocks, const ValueType *X, const ValueType *Y, const ValueType *onesVec, ValueType *tempVector, ValueType *tempResults, const MPI_Comm &mpi_communicator, ValueType *result) const
void copyRealArrsToComplexArr(const dftfe::size_type size, const ValueTypeReal *realArr, const ValueTypeReal *imagArr, ValueTypeComplex *complexArr)
Definition BLASWrapper.h:35
Definition BLASWrapper.h:33
cudaStream_t deviceStream_t
Definition DeviceTypeConfig.cu.h:27
cublasStatus_t deviceBlasStatus_t
Definition DeviceTypeConfig.cu.h:38
@ HOST
Definition MemorySpaceType.h:34
@ DEVICE
Definition MemorySpaceType.h:36
cublasMath_t deviceBlasMath_t
Definition DeviceTypeConfig.cu.h:39
cublasHandle_t deviceBlasHandle_t
Definition DeviceTypeConfig.cu.h:36
Definition pseudoPotentialToDftfeConverter.cc:34
unsigned int size_type
Definition TypeConfig.h:6
unsigned long int global_size_type
Definition TypeConfig.h:7
@ LDA
Definition ExcSSDFunctionalBaseClass.h:29