17#if defined(DFTFE_WITH_DEVICE)
18# ifndef linearAlgebraOperationsDevice_h
19# define linearAlgebraOperationsDevice_h
35 dsyevd_(
const char *jobz,
37 const unsigned int *n,
39 const unsigned int *lda,
42 const unsigned int *lwork,
44 const unsigned int *liwork,
48 zheevd_(
const char *jobz,
50 const unsigned int *n,
51 std::complex<double> *A,
52 const unsigned int *lda,
54 std::complex<double> *work,
55 const unsigned int *lwork,
57 const unsigned int *lrwork,
59 const unsigned int *liwork,
83 chebyshevFilterOverlapComputeCommunication(
84 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
97 template <
typename T1,
typename T2>
99 reformulatedChebyshevFilterOverlapComputeCommunication(
101 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
103 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
104 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
106 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
108 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
110 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
112 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
114 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
116 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
118 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
120 std::vector<double> eigenvalues,
125 const bool approxOverlapMatrix);
132 fillParallelOverlapMatScalapack(
133 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
135 distributedDeviceVec<dataTypes::number> &XBlock,
136 distributedDeviceVec<dataTypes::number> &OXBlock,
140 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
142 const MPI_Comm &mpiCommDomain,
143 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
144 const MPI_Comm &interBandGroupComm,
145 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
146 dftfe::ScaLAPACKMatrix<dataTypes::number> &overlapMatPar,
147 const dftParameters &dftParams);
156 fillParallelOverlapMatScalapackAsyncComputeCommun(
157 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
159 distributedDeviceVec<dataTypes::number> &XBlock,
160 distributedDeviceVec<dataTypes::number> &OXBlock,
164 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
166 const MPI_Comm &mpiCommDomain,
167 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
168 const MPI_Comm &interBandGroupComm,
169 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
170 dftfe::ScaLAPACKMatrix<dataTypes::number> &overlapMatPar,
171 const dftParameters &dftParams);
180 fillParallelOverlapMatMixedPrecScalapackAsyncComputeCommun(
181 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
183 distributedDeviceVec<dataTypes::number> &XBlock,
184 distributedDeviceVec<dataTypes::number> &OXBlock,
189 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
191 const MPI_Comm &mpiCommDomain,
192 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
193 const MPI_Comm &interBandGroupComm,
194 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
195 dftfe::ScaLAPACKMatrix<dataTypes::number> &overlapMatPar,
196 const dftParameters &dftParams);
203 fillParallelOverlapMatMixedPrecCommunScalapackAsyncComputeCommun(
204 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
206 distributedDeviceVec<dataTypes::number> &XBlock,
207 distributedDeviceVec<dataTypes::number> &OXBlock,
212 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
214 const MPI_Comm &mpiCommDomain,
215 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
216 const MPI_Comm &interBandGroupComm,
217 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
218 dftfe::ScaLAPACKMatrix<dataTypes::number> &overlapMatPar,
219 const dftParameters &dftParams);
226 fillParallelOverlapMatMixedPrecScalapack(
227 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
229 distributedDeviceVec<dataTypes::number> &XBlock,
230 distributedDeviceVec<dataTypes::number> &OXBlock,
235 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
237 const MPI_Comm &mpiCommDomain,
238 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
239 const MPI_Comm &interBandGroupComm,
240 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
241 dftfe::ScaLAPACKMatrix<dataTypes::number> &overlapMatPar,
242 const dftParameters &dftParams);
250 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
251 elpaScalaManager &elpaScala,
253 distributedDeviceVec<dataTypes::number> &Xb,
254 distributedDeviceVec<dataTypes::number> &HXb,
257 const MPI_Comm &mpiCommParent,
258 const MPI_Comm &mpiCommDomain,
259 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
260 const MPI_Comm &interBandGroupComm,
262 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
264 const dftParameters &dftParams,
265 const bool useMixedPrecOverall =
false);
268 subspaceRotationScalapack(
273 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
275 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
276 const MPI_Comm &mpiCommDomain,
277 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
278 const MPI_Comm &interBandGroupComm,
279 const dftfe::ScaLAPACKMatrix<dataTypes::number> &rotationMatPar,
280 const dftParameters &dftParams,
281 const bool rotationMatTranspose =
false,
282 const bool isRotationMatLowerTria =
false);
287 subspaceRotationCGSMixedPrecScalapack(
292 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
294 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
295 const MPI_Comm &mpiCommDomain,
296 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
297 const MPI_Comm &interBandGroupComm,
298 const dftfe::ScaLAPACKMatrix<dataTypes::number> &rotationMatPar,
299 const dftParameters &dftParams,
300 const bool rotationMatTranspose =
false);
304 subspaceRotationRRMixedPrecScalapack(
309 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
311 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
312 const MPI_Comm &mpiCommDomain,
313 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
314 const MPI_Comm &interBandGroupComm,
315 const dftfe::ScaLAPACKMatrix<dataTypes::number> &rotationMatPar,
316 const dftParameters &dftParams,
317 const bool rotationMatTranspose =
false);
322 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
323 elpaScalaManager &elpaScala,
325 distributedDeviceVec<dataTypes::number> &Xb,
326 distributedDeviceVec<dataTypes::number> &HXb,
329 const MPI_Comm &mpiCommParent,
330 const MPI_Comm &mpiCommDomain,
331 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
332 const MPI_Comm &interBandGroupComm,
333 std::vector<double> &eigenValues,
335 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
337 const dftParameters &dftParams,
338 const bool useMixedPrecOverall =
false);
342 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
343 elpaScalaManager &elpaScala,
345 distributedDeviceVec<dataTypes::number> &Xb,
346 distributedDeviceVec<dataTypes::number> &HXb,
349 const MPI_Comm &mpiCommParent,
350 const MPI_Comm &mpiCommDomain,
351 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
352 const MPI_Comm &interBandGroupComm,
353 std::vector<double> &eigenValues,
355 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
357 const dftParameters &dftParams,
358 const bool useMixedPrecOverall =
false);
364 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
366 distributedDeviceVec<dataTypes::number> &Xb,
367 distributedDeviceVec<dataTypes::number> &HXb,
370 const MPI_Comm &mpiCommParent,
371 const MPI_Comm &mpiCommDomain,
372 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
373 const MPI_Comm &interBandGroupComm,
374 const std::vector<double> &eigenValues,
375 const double fermiEnergy,
376 std::vector<double> &densityMatDerFermiEnergy,
377 dftfe::elpaScalaManager &elpaScala,
379 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
381 const dftParameters &dftParams);
385 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
387 distributedDeviceVec<dataTypes::number> &Xb,
388 distributedDeviceVec<dataTypes::number> &HXb,
391 const std::vector<double> &eigenValues,
392 const MPI_Comm &mpiCommParent,
393 const MPI_Comm &mpiCommDomain,
394 const MPI_Comm &interBandGroupComm,
396 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
398 std::vector<double> &residualNorm,
399 const dftParameters &dftParams,
400 const bool useBandParal =
false);
403 XtHX(operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
405 distributedDeviceVec<dataTypes::number> &XBlock,
406 distributedDeviceVec<dataTypes::number> &HXBlock,
410 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
412 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
413 dftfe::ScaLAPACKMatrix<dataTypes::number> &projHamPar,
414 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
415 const MPI_Comm &mpiCommDomain,
416 const MPI_Comm &interBandGroupComm,
417 const dftParameters &dftParams,
418 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
421 XtHXMixedPrecOverlapComputeCommun(
422 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
424 distributedDeviceVec<dataTypes::number> &XBlock,
425 distributedDeviceVec<dataTypes::number> &HXBlock,
430 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
432 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
433 dftfe::ScaLAPACKMatrix<dataTypes::number> &projHamPar,
434 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
435 const MPI_Comm &mpiCommDomain,
436 const MPI_Comm &interBandGroupComm,
437 const dftParameters &dftParams,
438 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
441 XtHXOverlapComputeCommun(
442 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
444 distributedDeviceVec<dataTypes::number> &XBlock,
445 distributedDeviceVec<dataTypes::number> &HXBlock,
449 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
451 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
452 dftfe::ScaLAPACKMatrix<dataTypes::number> &projHamPar,
453 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
454 const MPI_Comm &mpiCommDomain,
455 const MPI_Comm &interBandGroupComm,
456 const dftParameters &dftParams,
457 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
460 XtHXMixedPrecCommunOverlapComputeCommun(
461 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
463 distributedDeviceVec<dataTypes::number> &XBlock,
464 distributedDeviceVec<dataTypes::number> &HXBlock,
469 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
471 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
472 dftfe::ScaLAPACKMatrix<dataTypes::number> &projHamPar,
473 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
474 const MPI_Comm &mpiCommDomain,
475 const MPI_Comm &interBandGroupComm,
476 const dftParameters &dftParams,
477 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
double number
Definition dftfeDataTypes.h:42
Definition linearAlgebraOperationsDeviceKernels.h:9
void rayleighRitzGEP(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, elpaScalaManager &elpaScala, T *X, const dftfe::uInt numberComponents, const dftfe::uInt numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiCommDomain, std::vector< double > &eigenValues, const bool useMixedPrec, const dftParameters &dftParams)
Compute Rayleigh-Ritz projection (serial version using LAPACK, parallel version using ScaLAPACK)
void computeEigenResidualNorm(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *X, const std::vector< double > &eigenValues, const dftfe::uInt numberComponents, const dftfe::uInt numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &mpiCommDomain, const MPI_Comm &interBandGroupComm, std::vector< double > &residualNorm, const dftParameters &dftParams)
Compute residual norm associated with eigenValue problem of the given operator.
void rayleighRitz(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, elpaScalaManager &elpaScala, T *X, const dftfe::uInt numberComponents, const dftfe::uInt numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiCommDomain, std::vector< double > &eigenValues, const dftParameters &dftParams, const bool doCommAfterBandParal=true)
Compute Rayleigh-Ritz projection (serial version using LAPACK, parallel version using ScaLAPACK)
dftfe::uInt pseudoGramSchmidtOrthogonalization(elpaScalaManager &elpaScala, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *X, const dftfe::uInt numberComponents, const dftfe::uInt numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiCommDomain, const bool useMixedPrec, const dftParameters &dftParams)
Orthogonalize given subspace using Pseudo-Gram-Schmidt orthogonalization (serial version using LAPACK...
void XtHX(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const dataTypes::number *X, const dftfe::uInt numberComponents, const dftfe::uInt numberLocalDofs, const MPI_Comm &mpiCommDomain, const MPI_Comm &interBandGroupComm, const dftParameters &dftParams, std::vector< dataTypes::number > &ProjHam)
Compute projection of the operator into a subspace spanned by a given orthogonal basis HProjConj=X^{T...
void densityMatrixEigenBasisFirstOrderResponse(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *X, const dftfe::uInt N, const dftfe::uInt numberLocalDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &mpiCommDomain, const MPI_Comm &interBandGroupComm, const std::vector< double > &eigenValues, const double fermiEnergy, std::vector< double > &densityMatDerFermiEnergy, elpaScalaManager &elpaScala, const dftParameters &dftParams)
Compute first order response in density matrix with respect to perturbation in the Hamiltonian....
@ DEVICE
Definition MemorySpaceType.h:36
Definition pseudoPotentialToDftfeConverter.cc:34
std::uint32_t uInt
Definition TypeConfig.h:10