DFT-FE 1.1.0-pre
Density Functional Theory With Finite-Elements
Loading...
Searching...
No Matches
linearAlgebraOperationsInternal.h
Go to the documentation of this file.
1// ---------------------------------------------------------------------
2//
3// Copyright (c) 2017-2025 The Regents of the University of Michigan and DFT-FE
4// authors.
5//
6// This file is part of the DFT-FE code.
7//
8// The DFT-FE code is free software; you can use it, redistribute
9// it, and/or modify it under the terms of the GNU Lesser General
10// Public License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12// The full text of the license can be found in the file LICENSE at
13// the top level of the DFT-FE distribution.
14//
15// ---------------------------------------------------------------------
16
17#ifndef linearAlgebraOperationsInternal_h
18#define linearAlgebraOperationsInternal_h
19
20#include <headers.h>
21#include <operator.h>
22#include "process_grid.h"
23#include "scalapackWrapper.h"
24#include "dftParameters.h"
25#include <BLASWrapper.h>
26#include <elpa/elpa.h>
27#include <unordered_map>
28#include <dftUtils.h>
29namespace dftfe
30{
32 {
33 /**
34 * @brief Contains internal functions used in linearAlgebraOperations
35 *
36 * @author Sambit Das
37 */
38 namespace internal
39 {
40 /** @brief setup ELPA parameters.
41 *
42 */
43 void
45 const MPI_Comm &mpi_communicator,
46 MPI_Comm & processGridCommunicatorActive,
47 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
48 const unsigned int na,
49 const unsigned int nev,
50 const unsigned int blockSize,
51 elpa_t & elpaHandle,
52 const dftParameters & dftParams);
53
54 /** @brief Wrapper function to create a two dimensional processor grid for a square matrix in
55 * dftfe::ScaLAPACKMatrix storage format.
56 *
57 */
58 void
60 const MPI_Comm & mpi_communicator,
61 const unsigned size,
62 std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
63 const dftParameters & dftParams,
64 const bool useOnlyThumbRule = false);
65
66 /** @brief Wrapper function to create a two dimensional processor grid for a rectangular matrix in
67 * dftfe::ScaLAPACKMatrix storage format.
68 *
69 */
70 void
72 const MPI_Comm & mpi_communicator,
73 const unsigned sizeRows,
74 const unsigned sizeColumns,
75 std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
76 const dftParameters & dftParams);
77
78
79 /** @brief Creates global row/column id to local row/column ids for dftfe::ScaLAPACKMatrix
80 *
81 */
82 template <typename T>
83 void
85 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
86 const dftfe::ScaLAPACKMatrix<T> & mat,
87 std::unordered_map<unsigned int, unsigned int> & globalToLocalRowIdMap,
88 std::unordered_map<unsigned int, unsigned int>
89 &globalToLocalColumnIdMap);
90
91
92 /** @brief Mpi all reduce of ScaLAPACKMat across a given inter communicator.
93 * Used for band parallelization.
94 *
95 */
96 template <typename T>
97 void
99 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
101 const MPI_Comm & interComm);
102
103
104
105 /** @brief scale a ScaLAPACKMat with a scalar
106 *
107 *
108 */
109 template <typename T>
110 void
112 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
113 const std::shared_ptr<
115 & BLASWrapperPtr,
117 const T scalar);
118
119
120 /** @brief MPI_Bcast of ScaLAPACKMat across a given inter communicator from a given broadcast root.
121 * Used for band parallelization.
122 *
123 */
124 template <typename T>
125 void
127 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
129 const MPI_Comm & interComm,
130 const unsigned int broadcastRoot);
131
132 /** @brief Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix.
133 * X^{T} is the subspaceVectorsArray stored in the column major format (N
134 * x M). Sc is the overlapMatPar.
135 *
136 * The overlap matrix computation and filling is done in a blocked
137 * approach which avoids creation of full serial overlap matrix memory,
138 * and also avoids creation of another full X memory.
139 *
140 */
141 template <typename T>
142 void
144 const T *X,
145 const std::shared_ptr<
147 & BLASWrapperPtr,
148 const unsigned int XLocalSize,
149 const unsigned int numberVectors,
150 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
151 const MPI_Comm & interBandGroupComm,
152 const MPI_Comm & mpiComm,
153 dftfe::ScaLAPACKMatrix<T> & overlapMatPar,
154 const dftParameters & dftParams);
155
156
157 /** @brief Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix.
158 * X^{T} is the subspaceVectorsArray stored in the column major format (N
159 * x M). Sc is the overlapMatPar.
160 *
161 * The overlap matrix computation and filling is done in a blocked
162 * approach which avoids creation of full serial overlap matrix memory,
163 * and also avoids creation of another full X memory.
164 *
165 */
166 template <typename T, typename TLowPrec>
167 void
169 const T *X,
170 const std::shared_ptr<
172 & BLASWrapperPtr,
173 const unsigned int XLocalSize,
174 const unsigned int numberVectors,
175 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
176 const MPI_Comm & interBandGroupComm,
177 const MPI_Comm & mpiComm,
178 dftfe::ScaLAPACKMatrix<T> & overlapMatPar,
179 const dftParameters & dftParams);
180
181
182 /** @brief Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray
183 * stored in the column major format (N x M). Q is rotationMatPar (N x N).
184 *
185 * The subspace rotation inside this function is done in a blocked
186 * approach which avoids creation of full serial rotation matrix memory,
187 * and also avoids creation of another full subspaceVectorsArray memory.
188 * subspaceVectorsArrayLocalSize=N*M
189 *
190 */
191 template <typename T>
192 void
194 T *subspaceVectorsArray,
195 const std::shared_ptr<
197 & BLASWrapperPtr,
198 const unsigned int subspaceVectorsArrayLocalSize,
199 const unsigned int N,
200 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
201 const MPI_Comm & interBandGroupComm,
202 const MPI_Comm & mpiComm,
203 const dftfe::ScaLAPACKMatrix<T> & rotationMatPar,
204 const dftParameters & dftParams,
205 const bool rotationMatTranspose = false,
206 const bool isRotationMatLowerTria = false,
207 const bool doCommAfterBandParal = true);
208
209 /** @brief Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray
210 * stored in the column major format (N x M). Q is rotationMatPar (N x N).
211 *
212 * The subspace rotation inside this function is done in a blocked
213 * approach which avoids creation of full serial rotation matrix memory,
214 * and also avoids creation of another full subspaceVectorsArray memory.
215 * subspaceVectorsArrayLocalSize=N*M
216 *
217 */
218 template <typename T, typename TLowPrec>
219 void
221 T *subspaceVectorsArray,
222 const std::shared_ptr<
224 & BLASWrapperPtr,
225 const unsigned int subspaceVectorsArrayLocalSize,
226 const unsigned int N,
227 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
228 const MPI_Comm & interBandGroupComm,
229 const MPI_Comm & mpiComm,
230 const dftfe::ScaLAPACKMatrix<T> & rotationMatPar,
231 const dftParameters & dftParams,
232 const bool rotationMatTranspose = false,
233 const bool doCommAfterBandParal = true);
234
235
236 /** @brief Computes Y^{T}=Q*X^{T}.
237 *
238 * X^{T} is stored in the column major format (N x M). Q is extracted from
239 * the supplied QMat as Q=QMat{1:numberTopVectors}. If QMat is in column
240 * major format set QMatTranspose=false, otherwise set to true if in row
241 * major format. The dimensions (in row major format) of QMat could be
242 * either a) (N x numberTopVectors) or b) (N x N) where
243 * numberTopVectors!=N. In this case it is assumed that Q is stored in the
244 * first numberTopVectors columns of QMat. The subspace rotation inside
245 * this function is done in a blocked approach which avoids creation of
246 * full serial rotation matrix memory, and also avoids creation of another
247 * full X memory. subspaceVectorsArrayLocalSize=N*M
248 *
249 */
250 template <typename T>
251 void
253 const T *X,
254 const std::shared_ptr<
256 & BLASWrapperPtr,
257 T * Y,
258 const unsigned int subspaceVectorsArrayLocalSize,
259 const unsigned int N,
260 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
261 const unsigned int numberTopVectors,
262 const MPI_Comm & interBandGroupComm,
263 const MPI_Comm & mpiComm,
264 const dftfe::ScaLAPACKMatrix<T> & QMat,
265 const dftParameters & dftParams,
266 const bool QMatTranspose = false);
267
268 /** @brief Computes Y^{T}=Q*X^{T}.
269 *
270 * X^{T} is stored in the column major format (N x M). Q is extracted from
271 * the supplied QMat as Q=QMat{1:numberTopVectors}. If QMat is in column
272 * major format set QMatTranspose=false, otherwise set to true if in row
273 * major format. The dimensions (in row major format) of QMat could be
274 * either a) (N x numberTopVectors) or b) (N x N) where
275 * numberTopVectors!=N. In this case it is assumed that Q is stored in the
276 * first numberTopVectors columns of QMat. The subspace rotation inside
277 * this function is done in a blocked approach which avoids creation of
278 * full serial rotation matrix memory, and also avoids creation of another
279 * full X memory. subspaceVectorsArrayLocalSize=N*M
280 *
281 */
282 template <typename T, typename TLowPrec>
283 void
285 const T *X,
286 const std::shared_ptr<
288 & BLASWrapperPtr,
289 T * Y,
290 const unsigned int subspaceVectorsArrayLocalSize,
291 const unsigned int N,
292 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
293 const unsigned int numberTopVectors,
294 const MPI_Comm & interBandGroupComm,
295 const MPI_Comm & mpiComm,
296 const dftfe::ScaLAPACKMatrix<T> & QMat,
297 const dftParameters & dftParams,
298 const bool QMatTranspose = false);
299
300
301 /** @brief Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray
302 * stored in the column major format (N x M). Q is rotationMatPar (N x N).
303 *
304 * The subspace rotation inside this function is done in a blocked
305 * approach which avoids creation of full serial rotation matrix memory,
306 * and also avoids creation of another full subspaceVectorsArray memory.
307 * subspaceVectorsArrayLocalSize=N*M
308 *
309 */
310 template <typename T, typename TLowPrec>
311 void
313 T *subspaceVectorsArray,
314 const std::shared_ptr<
316 & BLASWrapperPtr,
317 const unsigned int subspaceVectorsArrayLocalSize,
318 const unsigned int N,
319 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
320 const MPI_Comm & interBandGroupComm,
321 const MPI_Comm & mpiComm,
322 const dftfe::ScaLAPACKMatrix<T> & rotationMatPar,
323 const dftParameters & dftParams,
324 const bool rotationMatTranspose = false,
325 const bool doCommAfterBandParal = true);
326 } // namespace internal
327 } // namespace linearAlgebraOperations
328} // namespace dftfe
329#endif
Scalapack wrapper adapted from dealii library and extended implementation to complex datatype.
Definition scalapackWrapper.h:37
Namespace which declares the input parameters and the functions to parse them from the input paramete...
Definition dftParameters.h:35
Definition BLASWrapper.h:35
Contains internal functions used in linearAlgebraOperations.
Definition linearAlgebraOperationsInternal.h:39
void createProcessGridSquareMatrix(const MPI_Comm &mpi_communicator, const unsigned size, std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftParameters &dftParams, const bool useOnlyThumbRule=false)
Wrapper function to create a two dimensional processor grid for a square matrix in dftfe::ScaLAPACKMa...
void subspaceRotationSpectrumSplitMixedPrec(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *Y, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const unsigned int numberTopVectors, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &QMat, const dftParameters &dftParams, const bool QMatTranspose=false)
Computes Y^{T}=Q*X^{T}.
void scaleScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, dftfe::ScaLAPACKMatrix< T > &mat, const T scalar)
scale a ScaLAPACKMat with a scalar
void fillParallelOverlapMatrix(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int XLocalSize, const unsigned int numberVectors, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, dftfe::ScaLAPACKMatrix< T > &overlapMatPar, const dftParameters &dftParams)
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray sto...
void broadcastAcrossInterCommScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, dftfe::ScaLAPACKMatrix< T > &mat, const MPI_Comm &interComm, const unsigned int broadcastRoot)
MPI_Bcast of ScaLAPACKMat across a given inter communicator from a given broadcast root....
void subspaceRotation(T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool isRotationMatLowerTria=false, const bool doCommAfterBandParal=true)
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (...
void subspaceRotationSpectrumSplit(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *Y, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const unsigned int numberTopVectors, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &QMat, const dftParameters &dftParams, const bool QMatTranspose=false)
Computes Y^{T}=Q*X^{T}.
void subspaceRotationCGSMixedPrec(T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool doCommAfterBandParal=true)
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (...
void createProcessGridRectangularMatrix(const MPI_Comm &mpi_communicator, const unsigned sizeRows, const unsigned sizeColumns, std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftParameters &dftParams)
Wrapper function to create a two dimensional processor grid for a rectangular matrix in dftfe::ScaLAP...
void fillParallelOverlapMatrixMixedPrec(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int XLocalSize, const unsigned int numberVectors, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, dftfe::ScaLAPACKMatrix< T > &overlapMatPar, const dftParameters &dftParams)
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray sto...
void sumAcrossInterCommScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, dftfe::ScaLAPACKMatrix< T > &mat, const MPI_Comm &interComm)
Mpi all reduce of ScaLAPACKMat across a given inter communicator. Used for band parallelization.
void subspaceRotationMixedPrec(T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const unsigned int subspaceVectorsArrayLocalSize, const unsigned int N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool doCommAfterBandParal=true)
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (...
void setupELPAHandleParameters(const MPI_Comm &mpi_communicator, MPI_Comm &processGridCommunicatorActive, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const unsigned int na, const unsigned int nev, const unsigned int blockSize, elpa_t &elpaHandle, const dftParameters &dftParams)
setup ELPA parameters.
void createGlobalToLocalIdMapsScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftfe::ScaLAPACKMatrix< T > &mat, std::unordered_map< unsigned int, unsigned int > &globalToLocalRowIdMap, std::unordered_map< unsigned int, unsigned int > &globalToLocalColumnIdMap)
Creates global row/column id to local row/column ids for dftfe::ScaLAPACKMatrix.
Contains linear algebra functions used in the implementation of an eigen solver.
Definition linearAlgebraOperations.h:502
Definition pseudoPotentialToDftfeConverter.cc:34