17#if defined(DFTFE_WITH_DEVICE)
18# ifndef linearAlgebraOperationsDevice_h
19# define linearAlgebraOperationsDevice_h
35 dsyevd_(
const char * jobz,
37 const unsigned int *n,
39 const unsigned int *lda,
42 const unsigned int *lwork,
44 const unsigned int *liwork,
48 zheevd_(
const char * jobz,
50 const unsigned int * n,
51 std::complex<double> *A,
52 const unsigned int * lda,
54 std::complex<double> *work,
55 const unsigned int * lwork,
57 const unsigned int * lrwork,
59 const unsigned int * liwork,
83 chebyshevFilterOverlapComputeCommunication(
84 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
97 template <
typename T1,
typename T2>
99 reformulatedChebyshevFilterOverlapComputeCommunication(
101 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
103 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
104 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
106 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
108 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
110 dftfe::linearAlgebra::MultiVector<T1, dftfe::utils::MemorySpace::DEVICE>
112 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
114 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
116 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
118 dftfe::linearAlgebra::MultiVector<T2, dftfe::utils::MemorySpace::DEVICE>
120 std::vector<double> eigenvalues,
121 const unsigned int m,
125 const bool approxOverlapMatrix);
132 fillParallelOverlapMatScalapack(
133 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
135 distributedDeviceVec<dataTypes::number> & XBlock,
136 distributedDeviceVec<dataTypes::number> & OXBlock,
137 const unsigned int M,
138 const unsigned int N,
140 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
142 const MPI_Comm & mpiCommDomain,
143 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
144 const MPI_Comm & interBandGroupComm,
145 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
146 dftfe::ScaLAPACKMatrix<dataTypes::number> & overlapMatPar,
147 const dftParameters & dftParams);
156 fillParallelOverlapMatScalapackAsyncComputeCommun(
157 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
159 distributedDeviceVec<dataTypes::number> & XBlock,
160 distributedDeviceVec<dataTypes::number> & OXBlock,
161 const unsigned int M,
162 const unsigned int N,
164 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
166 const MPI_Comm & mpiCommDomain,
167 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
168 const MPI_Comm & interBandGroupComm,
169 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
170 dftfe::ScaLAPACKMatrix<dataTypes::number> & overlapMatPar,
171 const dftParameters & dftParams);
180 fillParallelOverlapMatMixedPrecScalapackAsyncComputeCommun(
181 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
183 distributedDeviceVec<dataTypes::number> & XBlock,
184 distributedDeviceVec<dataTypes::number> & OXBlock,
185 const unsigned int M,
186 const unsigned int N,
187 const unsigned int Noc,
189 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
191 const MPI_Comm & mpiCommDomain,
192 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
193 const MPI_Comm & interBandGroupComm,
194 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
195 dftfe::ScaLAPACKMatrix<dataTypes::number> & overlapMatPar,
196 const dftParameters & dftParams);
203 fillParallelOverlapMatMixedPrecCommunScalapackAsyncComputeCommun(
204 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
206 distributedDeviceVec<dataTypes::number> & XBlock,
207 distributedDeviceVec<dataTypes::number> & OXBlock,
208 const unsigned int M,
209 const unsigned int N,
210 const unsigned int Noc,
212 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
214 const MPI_Comm & mpiCommDomain,
215 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
216 const MPI_Comm & interBandGroupComm,
217 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
218 dftfe::ScaLAPACKMatrix<dataTypes::number> & overlapMatPar,
219 const dftParameters & dftParams);
226 fillParallelOverlapMatMixedPrecScalapack(
227 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
229 distributedDeviceVec<dataTypes::number> & XBlock,
230 distributedDeviceVec<dataTypes::number> & OXBlock,
231 const unsigned int M,
232 const unsigned int N,
233 const unsigned int Noc,
235 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
237 const MPI_Comm & mpiCommDomain,
238 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
239 const MPI_Comm & interBandGroupComm,
240 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
241 dftfe::ScaLAPACKMatrix<dataTypes::number> & overlapMatPar,
242 const dftParameters & dftParams);
250 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
251 elpaScalaManager & elpaScala,
253 distributedDeviceVec<dataTypes::number> & Xb,
254 distributedDeviceVec<dataTypes::number> & HXb,
255 const unsigned int M,
256 const unsigned int N,
257 const MPI_Comm & mpiCommParent,
258 const MPI_Comm & mpiCommDomain,
259 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
260 const MPI_Comm & interBandGroupComm,
262 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
264 const dftParameters &dftParams,
265 const bool useMixedPrecOverall =
false);
268 subspaceRotationScalapack(
270 const unsigned int M,
271 const unsigned int N,
273 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
275 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
276 const MPI_Comm & mpiCommDomain,
277 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
278 const MPI_Comm & interBandGroupComm,
279 const dftfe::ScaLAPACKMatrix<dataTypes::number> &rotationMatPar,
280 const dftParameters & dftParams,
281 const bool rotationMatTranspose =
false,
282 const bool isRotationMatLowerTria =
false);
287 subspaceRotationCGSMixedPrecScalapack(
289 const unsigned int M,
290 const unsigned int N,
292 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
294 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
295 const MPI_Comm & mpiCommDomain,
296 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
297 const MPI_Comm & interBandGroupComm,
298 const dftfe::ScaLAPACKMatrix<dataTypes::number> &rotationMatPar,
299 const dftParameters & dftParams,
300 const bool rotationMatTranspose =
false);
304 subspaceRotationRRMixedPrecScalapack(
306 const unsigned int M,
307 const unsigned int N,
309 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
311 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
312 const MPI_Comm & mpiCommDomain,
313 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
314 const MPI_Comm & interBandGroupComm,
315 const dftfe::ScaLAPACKMatrix<dataTypes::number> &rotationMatPar,
316 const dftParameters & dftParams,
317 const bool rotationMatTranspose =
false);
322 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
323 elpaScalaManager & elpaScala,
325 distributedDeviceVec<dataTypes::number> & Xb,
326 distributedDeviceVec<dataTypes::number> & HXb,
327 const unsigned int M,
328 const unsigned int N,
329 const MPI_Comm & mpiCommParent,
330 const MPI_Comm & mpiCommDomain,
331 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
332 const MPI_Comm & interBandGroupComm,
333 std::vector<double> & eigenValues,
335 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
337 const dftParameters &dftParams,
338 const bool useMixedPrecOverall =
false);
342 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
343 elpaScalaManager & elpaScala,
345 distributedDeviceVec<dataTypes::number> & Xb,
346 distributedDeviceVec<dataTypes::number> & HXb,
347 const unsigned int M,
348 const unsigned int N,
349 const MPI_Comm & mpiCommParent,
350 const MPI_Comm & mpiCommDomain,
351 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
352 const MPI_Comm & interBandGroupComm,
353 std::vector<double> & eigenValues,
355 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
357 const dftParameters &dftParams,
358 const bool useMixedPrecOverall =
false);
364 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
366 distributedDeviceVec<dataTypes::number> & Xb,
367 distributedDeviceVec<dataTypes::number> & HXb,
368 const unsigned int M,
369 const unsigned int N,
370 const MPI_Comm & mpiCommParent,
371 const MPI_Comm & mpiCommDomain,
372 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
373 const MPI_Comm & interBandGroupComm,
374 const std::vector<double> &eigenValues,
375 const double fermiEnergy,
376 std::vector<double> & densityMatDerFermiEnergy,
377 dftfe::elpaScalaManager & elpaScala,
379 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
381 const dftParameters &dftParams);
385 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
387 distributedDeviceVec<dataTypes::number> & Xb,
388 distributedDeviceVec<dataTypes::number> & HXb,
389 const unsigned int M,
390 const unsigned int N,
391 const std::vector<double> & eigenValues,
392 const MPI_Comm & mpiCommParent,
393 const MPI_Comm & mpiCommDomain,
394 const MPI_Comm & interBandGroupComm,
396 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
398 std::vector<double> &residualNorm,
399 const dftParameters &dftParams,
400 const bool useBandParal =
false);
403 XtHX(operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
405 distributedDeviceVec<dataTypes::number> & XBlock,
406 distributedDeviceVec<dataTypes::number> & HXBlock,
407 const unsigned int M,
408 const unsigned int N,
410 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
412 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
413 dftfe::ScaLAPACKMatrix<dataTypes::number> & projHamPar,
414 utils::DeviceCCLWrapper &devicecclMpiCommDomain,
415 const MPI_Comm & mpiCommDomain,
416 const MPI_Comm & interBandGroupComm,
417 const dftParameters & dftParams,
418 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
421 XtHXMixedPrecOverlapComputeCommun(
422 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
424 distributedDeviceVec<dataTypes::number> & XBlock,
425 distributedDeviceVec<dataTypes::number> & HXBlock,
426 const unsigned int M,
427 const unsigned int N,
428 const unsigned int Noc,
430 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
432 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
433 dftfe::ScaLAPACKMatrix<dataTypes::number> & projHamPar,
434 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
435 const MPI_Comm & mpiCommDomain,
436 const MPI_Comm & interBandGroupComm,
437 const dftParameters & dftParams,
438 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
441 XtHXOverlapComputeCommun(
442 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
444 distributedDeviceVec<dataTypes::number> & XBlock,
445 distributedDeviceVec<dataTypes::number> & HXBlock,
446 const unsigned int M,
447 const unsigned int N,
449 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
451 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
452 dftfe::ScaLAPACKMatrix<dataTypes::number> & projHamPar,
453 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
454 const MPI_Comm & mpiCommDomain,
455 const MPI_Comm & interBandGroupComm,
456 const dftParameters & dftParams,
457 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
460 XtHXMixedPrecCommunOverlapComputeCommun(
461 operatorDFTClass<dftfe::utils::MemorySpace::DEVICE> &operatorMatrix,
463 distributedDeviceVec<dataTypes::number> & XBlock,
464 distributedDeviceVec<dataTypes::number> & HXBlock,
465 const unsigned int M,
466 const unsigned int N,
467 const unsigned int Noc,
469 dftfe::linearAlgebra::BLASWrapper<dftfe::utils::MemorySpace::DEVICE>>
471 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
472 dftfe::ScaLAPACKMatrix<dataTypes::number> & projHamPar,
473 utils::DeviceCCLWrapper & devicecclMpiCommDomain,
474 const MPI_Comm & mpiCommDomain,
475 const MPI_Comm & interBandGroupComm,
476 const dftParameters & dftParams,
477 const bool onlyHPrimePartForFirstOrderDensityMatResponse =
false);
double number
Definition dftfeDataTypes.h:44
Definition linearAlgebraOperationsDeviceKernels.h:9
void rayleighRitzGEP(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, elpaScalaManager &elpaScala, T *X, const unsigned int numberComponents, const unsigned int numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiCommDomain, std::vector< double > &eigenValues, const bool useMixedPrec, const dftParameters &dftParams)
Compute Rayleigh-Ritz projection (serial version using LAPACK, parallel version using ScaLAPACK)
void densityMatrixEigenBasisFirstOrderResponse(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *X, const unsigned int N, const unsigned int numberLocalDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &mpiCommDomain, const MPI_Comm &interBandGroupComm, const std::vector< double > &eigenValues, const double fermiEnergy, std::vector< double > &densityMatDerFermiEnergy, elpaScalaManager &elpaScala, const dftParameters &dftParams)
Compute first order response in density matrix with respect to perturbation in the Hamiltonian....
void XtHX(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const dataTypes::number *X, const unsigned int numberComponents, const unsigned int numberLocalDofs, const MPI_Comm &mpiCommDomain, const MPI_Comm &interBandGroupComm, const dftParameters &dftParams, std::vector< dataTypes::number > &ProjHam)
Compute projection of the operator into a subspace spanned by a given orthogonal basis HProjConj=X^{T...
void computeEigenResidualNorm(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *X, const std::vector< double > &eigenValues, const unsigned int numberComponents, const unsigned int numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &mpiCommDomain, const MPI_Comm &interBandGroupComm, std::vector< double > &residualNorm, const dftParameters &dftParams)
Compute residual norm associated with eigenValue problem of the given operator.
void rayleighRitz(operatorDFTClass< dftfe::utils::MemorySpace::HOST > &operatorMatrix, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, elpaScalaManager &elpaScala, T *X, const unsigned int numberComponents, const unsigned int numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiCommDomain, std::vector< double > &eigenValues, const dftParameters &dftParams, const bool doCommAfterBandParal=true)
Compute Rayleigh-Ritz projection (serial version using LAPACK, parallel version using ScaLAPACK)
unsigned int pseudoGramSchmidtOrthogonalization(elpaScalaManager &elpaScala, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *X, const unsigned int numberComponents, const unsigned int numberDofs, const MPI_Comm &mpiCommParent, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiCommDomain, const bool useMixedPrec, const dftParameters &dftParams)
Orthogonalize given subspace using Pseudo-Gram-Schmidt orthogonalization (serial version using LAPACK...
@ DEVICE
Definition MemorySpaceType.h:36
Definition pseudoPotentialToDftfeConverter.cc:34