DFT-FE 1.1.0-pre
Density Functional Theory With Finite-Elements
|
Contains internal functions used in linearAlgebraOperations. More...
Functions | |
void | setupELPAHandleParameters (const MPI_Comm &mpi_communicator, MPI_Comm &processGridCommunicatorActive, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const unsigned int na, const unsigned int nev, const unsigned int blockSize, elpa_t &elpaHandle, const dftParameters &dftParams) |
setup ELPA parameters. | |
void | createProcessGridSquareMatrix (const MPI_Comm &mpi_communicator, const unsigned size, std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftParameters &dftParams, const bool useOnlyThumbRule=false) |
Wrapper function to create a two dimensional processor grid for a square matrix in dftfe::ScaLAPACKMatrix storage format. | |
void | createProcessGridRectangularMatrix (const MPI_Comm &mpi_communicator, const unsigned sizeRows, const unsigned sizeColumns, std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftParameters &dftParams) |
Wrapper function to create a two dimensional processor grid for a rectangular matrix in dftfe::ScaLAPACKMatrix storage format. | |
template<typename T> | |
void | createGlobalToLocalIdMapsScaLAPACKMat (const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftfe::ScaLAPACKMatrix< T > &mat, std::unordered_map< unsigned int, unsigned int > &globalToLocalRowIdMap, std::unordered_map< unsigned int, unsigned int > &globalToLocalColumnIdMap) |
Creates global row/column id to local row/column ids for dftfe::ScaLAPACKMatrix. | |
template<typename T> | |
void | sumAcrossInterCommScaLAPACKMat (const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, dftfe::ScaLAPACKMatrix< T > &mat, const MPI_Comm &interComm) |
Mpi all reduce of ScaLAPACKMat across a given inter communicator. Used for band parallelization. | |
template<typename T> | |
void | scaleScaLAPACKMat (const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, dftfe::ScaLAPACKMatrix< T > &mat, const T scalar) |
scale a ScaLAPACKMat with a scalar | |
template<typename T> | |
void | broadcastAcrossInterCommScaLAPACKMat (const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, dftfe::ScaLAPACKMatrix< T > &mat, const MPI_Comm &interComm, const unsigned int broadcastRoot) |
MPI_Bcast of ScaLAPACKMat across a given inter communicator from a given broadcast root. Used for band parallelization. | |
template<typename T> | |
void | fillParallelOverlapMatrix (const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int XLocalSize, const unsigned int numberVectors, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, dftfe::ScaLAPACKMatrix< T > &overlapMatPar, const dftParameters &dftParams) |
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Sc is the overlapMatPar. | |
template<typename T, typename TLowPrec> | |
void | fillParallelOverlapMatrixMixedPrec (const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int XLocalSize, const unsigned int numberVectors, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, dftfe::ScaLAPACKMatrix< T > &overlapMatPar, const dftParameters &dftParams) |
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Sc is the overlapMatPar. | |
template<typename T> | |
void | subspaceRotation (T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool isRotationMatLowerTria=false, const bool doCommAfterBandParal=true) |
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Q is rotationMatPar (N x N). | |
template<typename T, typename TLowPrec> | |
void | subspaceRotationMixedPrec (T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool doCommAfterBandParal=true) |
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Q is rotationMatPar (N x N). | |
template<typename T> | |
void | subspaceRotationSpectrumSplit (const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *Y, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const unsigned int numberTopVectors, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &QMat, const dftParameters &dftParams, const bool QMatTranspose=false) |
Computes Y^{T}=Q*X^{T}. | |
template<typename T, typename TLowPrec> | |
void | subspaceRotationSpectrumSplitMixedPrec (const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *Y, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const unsigned int numberTopVectors, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &QMat, const dftParameters &dftParams, const bool QMatTranspose=false) |
Computes Y^{T}=Q*X^{T}. | |
template<typename T, typename TLowPrec> | |
void | subspaceRotationCGSMixedPrec (T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool doCommAfterBandParal=true) |
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Q is rotationMatPar (N x N). | |
Contains internal functions used in linearAlgebraOperations.
void dftfe::linearAlgebraOperations::internal::broadcastAcrossInterCommScaLAPACKMat | ( | const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, |
dftfe::ScaLAPACKMatrix< T > & | mat, | ||
const MPI_Comm & | interComm, | ||
const unsigned int | broadcastRoot ) |
MPI_Bcast of ScaLAPACKMat across a given inter communicator from a given broadcast root. Used for band parallelization.
void dftfe::linearAlgebraOperations::internal::createGlobalToLocalIdMapsScaLAPACKMat | ( | const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, |
const dftfe::ScaLAPACKMatrix< T > & | mat, | ||
std::unordered_map< unsigned int, unsigned int > & | globalToLocalRowIdMap, | ||
std::unordered_map< unsigned int, unsigned int > & | globalToLocalColumnIdMap ) |
Creates global row/column id to local row/column ids for dftfe::ScaLAPACKMatrix.
void dftfe::linearAlgebraOperations::internal::createProcessGridRectangularMatrix | ( | const MPI_Comm & | mpi_communicator, |
const unsigned | sizeRows, | ||
const unsigned | sizeColumns, | ||
std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const dftParameters & | dftParams ) |
Wrapper function to create a two dimensional processor grid for a rectangular matrix in dftfe::ScaLAPACKMatrix storage format.
void dftfe::linearAlgebraOperations::internal::createProcessGridSquareMatrix | ( | const MPI_Comm & | mpi_communicator, |
const unsigned | size, | ||
std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const dftParameters & | dftParams, | ||
const bool | useOnlyThumbRule = false ) |
Wrapper function to create a two dimensional processor grid for a square matrix in dftfe::ScaLAPACKMatrix storage format.
void dftfe::linearAlgebraOperations::internal::fillParallelOverlapMatrix | ( | const T * | X, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
const unsigned int | XLocalSize, | ||
const unsigned int | numberVectors, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const MPI_Comm & | interBandGroupComm, | ||
const MPI_Comm & | mpiComm, | ||
dftfe::ScaLAPACKMatrix< T > & | overlapMatPar, | ||
const dftParameters & | dftParams ) |
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Sc is the overlapMatPar.
The overlap matrix computation and filling is done in a blocked approach which avoids creation of full serial overlap matrix memory, and also avoids creation of another full X memory.
void dftfe::linearAlgebraOperations::internal::fillParallelOverlapMatrixMixedPrec | ( | const T * | X, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
const unsigned int | XLocalSize, | ||
const unsigned int | numberVectors, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const MPI_Comm & | interBandGroupComm, | ||
const MPI_Comm & | mpiComm, | ||
dftfe::ScaLAPACKMatrix< T > & | overlapMatPar, | ||
const dftParameters & | dftParams ) |
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Sc is the overlapMatPar.
The overlap matrix computation and filling is done in a blocked approach which avoids creation of full serial overlap matrix memory, and also avoids creation of another full X memory.
void dftfe::linearAlgebraOperations::internal::scaleScaLAPACKMat | ( | const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
dftfe::ScaLAPACKMatrix< T > & | mat, | ||
const T | scalar ) |
scale a ScaLAPACKMat with a scalar
void dftfe::linearAlgebraOperations::internal::setupELPAHandleParameters | ( | const MPI_Comm & | mpi_communicator, |
MPI_Comm & | processGridCommunicatorActive, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const unsigned int | na, | ||
const unsigned int | nev, | ||
const unsigned int | blockSize, | ||
elpa_t & | elpaHandle, | ||
const dftParameters & | dftParams ) |
setup ELPA parameters.
void dftfe::linearAlgebraOperations::internal::subspaceRotation | ( | T * | subspaceVectorsArray, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
const unsigned int | subspaceVectorsArrayLocalSize, | ||
const unsigned int | N, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const MPI_Comm & | interBandGroupComm, | ||
const MPI_Comm & | mpiComm, | ||
const dftfe::ScaLAPACKMatrix< T > & | rotationMatPar, | ||
const dftParameters & | dftParams, | ||
const bool | rotationMatTranspose = false, | ||
const bool | isRotationMatLowerTria = false, | ||
const bool | doCommAfterBandParal = true ) |
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Q is rotationMatPar (N x N).
The subspace rotation inside this function is done in a blocked approach which avoids creation of full serial rotation matrix memory, and also avoids creation of another full subspaceVectorsArray memory. subspaceVectorsArrayLocalSize=N*M
void dftfe::linearAlgebraOperations::internal::subspaceRotationCGSMixedPrec | ( | T * | subspaceVectorsArray, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
const unsigned int | subspaceVectorsArrayLocalSize, | ||
const unsigned int | N, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const MPI_Comm & | interBandGroupComm, | ||
const MPI_Comm & | mpiComm, | ||
const dftfe::ScaLAPACKMatrix< T > & | rotationMatPar, | ||
const dftParameters & | dftParams, | ||
const bool | rotationMatTranspose = false, | ||
const bool | doCommAfterBandParal = true ) |
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Q is rotationMatPar (N x N).
The subspace rotation inside this function is done in a blocked approach which avoids creation of full serial rotation matrix memory, and also avoids creation of another full subspaceVectorsArray memory. subspaceVectorsArrayLocalSize=N*M
void dftfe::linearAlgebraOperations::internal::subspaceRotationMixedPrec | ( | T * | subspaceVectorsArray, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
const unsigned int | subspaceVectorsArrayLocalSize, | ||
const unsigned int | N, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const MPI_Comm & | interBandGroupComm, | ||
const MPI_Comm & | mpiComm, | ||
const dftfe::ScaLAPACKMatrix< T > & | rotationMatPar, | ||
const dftParameters & | dftParams, | ||
const bool | rotationMatTranspose = false, | ||
const bool | doCommAfterBandParal = true ) |
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (N x M). Q is rotationMatPar (N x N).
The subspace rotation inside this function is done in a blocked approach which avoids creation of full serial rotation matrix memory, and also avoids creation of another full subspaceVectorsArray memory. subspaceVectorsArrayLocalSize=N*M
void dftfe::linearAlgebraOperations::internal::subspaceRotationSpectrumSplit | ( | const T * | X, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
T * | Y, | ||
const unsigned int | subspaceVectorsArrayLocalSize, | ||
const unsigned int | N, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const unsigned int | numberTopVectors, | ||
const MPI_Comm & | interBandGroupComm, | ||
const MPI_Comm & | mpiComm, | ||
const dftfe::ScaLAPACKMatrix< T > & | QMat, | ||
const dftParameters & | dftParams, | ||
const bool | QMatTranspose = false ) |
Computes Y^{T}=Q*X^{T}.
X^{T} is stored in the column major format (N x M). Q is extracted from the supplied QMat as Q=QMat{1:numberTopVectors}. If QMat is in column major format set QMatTranspose=false, otherwise set to true if in row major format. The dimensions (in row major format) of QMat could be either a) (N x numberTopVectors) or b) (N x N) where numberTopVectors!=N. In this case it is assumed that Q is stored in the first numberTopVectors columns of QMat. The subspace rotation inside this function is done in a blocked approach which avoids creation of full serial rotation matrix memory, and also avoids creation of another full X memory. subspaceVectorsArrayLocalSize=N*M
void dftfe::linearAlgebraOperations::internal::subspaceRotationSpectrumSplitMixedPrec | ( | const T * | X, |
const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > & | BLASWrapperPtr, | ||
T * | Y, | ||
const unsigned int | subspaceVectorsArrayLocalSize, | ||
const unsigned int | N, | ||
const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, | ||
const unsigned int | numberTopVectors, | ||
const MPI_Comm & | interBandGroupComm, | ||
const MPI_Comm & | mpiComm, | ||
const dftfe::ScaLAPACKMatrix< T > & | QMat, | ||
const dftParameters & | dftParams, | ||
const bool | QMatTranspose = false ) |
Computes Y^{T}=Q*X^{T}.
X^{T} is stored in the column major format (N x M). Q is extracted from the supplied QMat as Q=QMat{1:numberTopVectors}. If QMat is in column major format set QMatTranspose=false, otherwise set to true if in row major format. The dimensions (in row major format) of QMat could be either a) (N x numberTopVectors) or b) (N x N) where numberTopVectors!=N. In this case it is assumed that Q is stored in the first numberTopVectors columns of QMat. The subspace rotation inside this function is done in a blocked approach which avoids creation of full serial rotation matrix memory, and also avoids creation of another full X memory. subspaceVectorsArrayLocalSize=N*M
void dftfe::linearAlgebraOperations::internal::sumAcrossInterCommScaLAPACKMat | ( | const std::shared_ptr< const dftfe::ProcessGrid > & | processGrid, |
dftfe::ScaLAPACKMatrix< T > & | mat, | ||
const MPI_Comm & | interComm ) |
Mpi all reduce of ScaLAPACKMat across a given inter communicator. Used for band parallelization.