DFT-FE 1.1.0-pre
Density Functional Theory With Finite-Elements
Loading...
Searching...
No Matches
linearAlgebraOperationsInternal.h
Go to the documentation of this file.
1// ---------------------------------------------------------------------
2//
3// Copyright (c) 2017-2025 The Regents of the University of Michigan and DFT-FE
4// authors.
5//
6// This file is part of the DFT-FE code.
7//
8// The DFT-FE code is free software; you can use it, redistribute
9// it, and/or modify it under the terms of the GNU Lesser General
10// Public License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12// The full text of the license can be found in the file LICENSE at
13// the top level of the DFT-FE distribution.
14//
15// ---------------------------------------------------------------------
16
17#ifndef linearAlgebraOperationsInternal_h
18#define linearAlgebraOperationsInternal_h
19
20#include <headers.h>
21#include <operator.h>
22#include "process_grid.h"
23#include "scalapackWrapper.h"
24#include "dftParameters.h"
25#include <BLASWrapper.h>
26#include <elpa/elpa.h>
27#include <unordered_map>
28#include <dftUtils.h>
29namespace dftfe
30{
32 {
33 /**
34 * @brief Contains internal functions used in linearAlgebraOperations
35 *
36 * @author Sambit Das
37 */
38 namespace internal
39 {
40 /** @brief setup ELPA parameters.
41 *
42 */
43 void
45 const MPI_Comm &mpi_communicator,
46 MPI_Comm &processGridCommunicatorActive,
47 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
48 const dftfe::uInt na,
49 const dftfe::uInt nev,
50 const dftfe::uInt blockSize,
51 elpa_t &elpaHandle,
52 const dftParameters &dftParams);
53
54 /** @brief Wrapper function to create a two dimensional processor grid for a square matrix in
55 * dftfe::ScaLAPACKMatrix storage format.
56 *
57 */
58 void
60 const MPI_Comm &mpi_communicator,
61 const dftfe::uInt size,
62 std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
63 const dftParameters &dftParams,
64 const bool useOnlyThumbRule = false);
65
66 /** @brief Wrapper function to create a two dimensional processor grid for a rectangular matrix in
67 * dftfe::ScaLAPACKMatrix storage format.
68 *
69 */
70 void
72 const MPI_Comm &mpi_communicator,
73 const dftfe::uInt sizeRows,
74 const dftfe::uInt sizeColumns,
75 std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
76 const dftParameters &dftParams);
77
78
79 /** @brief Creates global row/column id to local row/column ids for dftfe::ScaLAPACKMatrix
80 *
81 */
82 template <typename T>
83 void
85 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
87 std::unordered_map<dftfe::uInt, dftfe::uInt> &globalToLocalRowIdMap,
88 std::unordered_map<dftfe::uInt, dftfe::uInt> &globalToLocalColumnIdMap);
89
90
91 /** @brief Mpi all reduce of ScaLAPACKMat across a given inter communicator.
92 * Used for band parallelization.
93 *
94 */
95 template <typename T>
96 void
98 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
100 const MPI_Comm &interComm);
101
102
103
104 /** @brief scale a ScaLAPACKMat with a scalar
105 *
106 *
107 */
108 template <typename T>
109 void
111 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
112 const std::shared_ptr<
114 &BLASWrapperPtr,
116 const T scalar);
117
118
119 /** @brief MPI_Bcast of ScaLAPACKMat across a given inter communicator from a given broadcast root.
120 * Used for band parallelization.
121 *
122 */
123 template <typename T>
124 void
126 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
128 const MPI_Comm &interComm,
129 const dftfe::uInt broadcastRoot);
130
131 /** @brief Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix.
132 * X^{T} is the subspaceVectorsArray stored in the column major format (N
133 * x M). Sc is the overlapMatPar.
134 *
135 * The overlap matrix computation and filling is done in a blocked
136 * approach which avoids creation of full serial overlap matrix memory,
137 * and also avoids creation of another full X memory.
138 *
139 */
140 template <typename T>
141 void
143 const T *X,
144 const std::shared_ptr<
146 &BLASWrapperPtr,
147 const dftfe::uInt XLocalSize,
148 const dftfe::uInt numberVectors,
149 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
150 const MPI_Comm &interBandGroupComm,
151 const MPI_Comm &mpiComm,
152 dftfe::ScaLAPACKMatrix<T> &overlapMatPar,
153 const dftParameters &dftParams);
154
155
156 /** @brief Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix.
157 * X^{T} is the subspaceVectorsArray stored in the column major format (N
158 * x M). Sc is the overlapMatPar.
159 *
160 * The overlap matrix computation and filling is done in a blocked
161 * approach which avoids creation of full serial overlap matrix memory,
162 * and also avoids creation of another full X memory.
163 *
164 */
165 template <typename T, typename TLowPrec>
166 void
168 const T *X,
169 const std::shared_ptr<
171 &BLASWrapperPtr,
172 const dftfe::uInt XLocalSize,
173 const dftfe::uInt numberVectors,
174 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
175 const MPI_Comm &interBandGroupComm,
176 const MPI_Comm &mpiComm,
177 dftfe::ScaLAPACKMatrix<T> &overlapMatPar,
178 const dftParameters &dftParams);
179
180
181 /** @brief Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray
182 * stored in the column major format (N x M). Q is rotationMatPar (N x N).
183 *
184 * The subspace rotation inside this function is done in a blocked
185 * approach which avoids creation of full serial rotation matrix memory,
186 * and also avoids creation of another full subspaceVectorsArray memory.
187 * subspaceVectorsArrayLocalSize=N*M
188 *
189 */
190 template <typename T>
191 void
193 T *subspaceVectorsArray,
194 const std::shared_ptr<
196 &BLASWrapperPtr,
197 const dftfe::uInt subspaceVectorsArrayLocalSize,
198 const dftfe::uInt N,
199 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
200 const MPI_Comm &interBandGroupComm,
201 const MPI_Comm &mpiComm,
202 const dftfe::ScaLAPACKMatrix<T> &rotationMatPar,
203 const dftParameters &dftParams,
204 const bool rotationMatTranspose = false,
205 const bool isRotationMatLowerTria = false,
206 const bool doCommAfterBandParal = true);
207
208 /** @brief Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray
209 * stored in the column major format (N x M). Q is rotationMatPar (N x N).
210 *
211 * The subspace rotation inside this function is done in a blocked
212 * approach which avoids creation of full serial rotation matrix memory,
213 * and also avoids creation of another full subspaceVectorsArray memory.
214 * subspaceVectorsArrayLocalSize=N*M
215 *
216 */
217 template <typename T, typename TLowPrec>
218 void
220 T *subspaceVectorsArray,
221 const std::shared_ptr<
223 &BLASWrapperPtr,
224 const dftfe::uInt subspaceVectorsArrayLocalSize,
225 const dftfe::uInt N,
226 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
227 const MPI_Comm &interBandGroupComm,
228 const MPI_Comm &mpiComm,
229 const dftfe::ScaLAPACKMatrix<T> &rotationMatPar,
230 const dftParameters &dftParams,
231 const bool rotationMatTranspose = false,
232 const bool doCommAfterBandParal = true);
233
234
235 /** @brief Computes Y^{T}=Q*X^{T}.
236 *
237 * X^{T} is stored in the column major format (N x M). Q is extracted from
238 * the supplied QMat as Q=QMat{1:numberTopVectors}. If QMat is in column
239 * major format set QMatTranspose=false, otherwise set to true if in row
240 * major format. The dimensions (in row major format) of QMat could be
241 * either a) (N x numberTopVectors) or b) (N x N) where
242 * numberTopVectors!=N. In this case it is assumed that Q is stored in the
243 * first numberTopVectors columns of QMat. The subspace rotation inside
244 * this function is done in a blocked approach which avoids creation of
245 * full serial rotation matrix memory, and also avoids creation of another
246 * full X memory. subspaceVectorsArrayLocalSize=N*M
247 *
248 */
249 template <typename T>
250 void
252 const T *X,
253 const std::shared_ptr<
255 &BLASWrapperPtr,
256 T *Y,
257 const dftfe::uInt subspaceVectorsArrayLocalSize,
258 const dftfe::uInt N,
259 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
260 const dftfe::uInt numberTopVectors,
261 const MPI_Comm &interBandGroupComm,
262 const MPI_Comm &mpiComm,
263 const dftfe::ScaLAPACKMatrix<T> &QMat,
264 const dftParameters &dftParams,
265 const bool QMatTranspose = false);
266
267 /** @brief Computes Y^{T}=Q*X^{T}.
268 *
269 * X^{T} is stored in the column major format (N x M). Q is extracted from
270 * the supplied QMat as Q=QMat{1:numberTopVectors}. If QMat is in column
271 * major format set QMatTranspose=false, otherwise set to true if in row
272 * major format. The dimensions (in row major format) of QMat could be
273 * either a) (N x numberTopVectors) or b) (N x N) where
274 * numberTopVectors!=N. In this case it is assumed that Q is stored in the
275 * first numberTopVectors columns of QMat. The subspace rotation inside
276 * this function is done in a blocked approach which avoids creation of
277 * full serial rotation matrix memory, and also avoids creation of another
278 * full X memory. subspaceVectorsArrayLocalSize=N*M
279 *
280 */
281 template <typename T, typename TLowPrec>
282 void
284 const T *X,
285 const std::shared_ptr<
287 &BLASWrapperPtr,
288 T *Y,
289 const dftfe::uInt subspaceVectorsArrayLocalSize,
290 const dftfe::uInt N,
291 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
292 const dftfe::uInt numberTopVectors,
293 const MPI_Comm &interBandGroupComm,
294 const MPI_Comm &mpiComm,
295 const dftfe::ScaLAPACKMatrix<T> &QMat,
296 const dftParameters &dftParams,
297 const bool QMatTranspose = false);
298
299
300 /** @brief Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray
301 * stored in the column major format (N x M). Q is rotationMatPar (N x N).
302 *
303 * The subspace rotation inside this function is done in a blocked
304 * approach which avoids creation of full serial rotation matrix memory,
305 * and also avoids creation of another full subspaceVectorsArray memory.
306 * subspaceVectorsArrayLocalSize=N*M
307 *
308 */
309 template <typename T, typename TLowPrec>
310 void
312 T *subspaceVectorsArray,
313 const std::shared_ptr<
315 &BLASWrapperPtr,
316 const dftfe::uInt subspaceVectorsArrayLocalSize,
317 const dftfe::uInt N,
318 const std::shared_ptr<const dftfe::ProcessGrid> &processGrid,
319 const MPI_Comm &interBandGroupComm,
320 const MPI_Comm &mpiComm,
321 const dftfe::ScaLAPACKMatrix<T> &rotationMatPar,
322 const dftParameters &dftParams,
323 const bool rotationMatTranspose = false,
324 const bool doCommAfterBandParal = true);
325 } // namespace internal
326 } // namespace linearAlgebraOperations
327} // namespace dftfe
328#endif
Scalapack wrapper adapted from dealii library and extended implementation to complex datatype.
Definition scalapackWrapper.h:37
Namespace which declares the input parameters and the functions to parse them from the input paramete...
Definition dftParameters.h:36
Definition BLASWrapper.h:35
Contains internal functions used in linearAlgebraOperations.
Definition linearAlgebraOperationsInternal.h:39
void broadcastAcrossInterCommScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, dftfe::ScaLAPACKMatrix< T > &mat, const MPI_Comm &interComm, const dftfe::uInt broadcastRoot)
MPI_Bcast of ScaLAPACKMat across a given inter communicator from a given broadcast root....
void createProcessGridRectangularMatrix(const MPI_Comm &mpi_communicator, const dftfe::uInt sizeRows, const dftfe::uInt sizeColumns, std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftParameters &dftParams)
Wrapper function to create a two dimensional processor grid for a rectangular matrix in dftfe::ScaLAP...
void createProcessGridSquareMatrix(const MPI_Comm &mpi_communicator, const dftfe::uInt size, std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftParameters &dftParams, const bool useOnlyThumbRule=false)
Wrapper function to create a two dimensional processor grid for a square matrix in dftfe::ScaLAPACKMa...
void scaleScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, dftfe::ScaLAPACKMatrix< T > &mat, const T scalar)
scale a ScaLAPACKMat with a scalar
void setupELPAHandleParameters(const MPI_Comm &mpi_communicator, MPI_Comm &processGridCommunicatorActive, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftfe::uInt na, const dftfe::uInt nev, const dftfe::uInt blockSize, elpa_t &elpaHandle, const dftParameters &dftParams)
setup ELPA parameters.
void subspaceRotationMixedPrec(T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const dftfe::uInt subspaceVectorsArrayLocalSize, const dftfe::uInt N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool doCommAfterBandParal=true)
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (...
void subspaceRotationSpectrumSplit(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *Y, const dftfe::uInt subspaceVectorsArrayLocalSize, const dftfe::uInt N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftfe::uInt numberTopVectors, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &QMat, const dftParameters &dftParams, const bool QMatTranspose=false)
Computes Y^{T}=Q*X^{T}.
void fillParallelOverlapMatrixMixedPrec(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const dftfe::uInt XLocalSize, const dftfe::uInt numberVectors, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, dftfe::ScaLAPACKMatrix< T > &overlapMatPar, const dftParameters &dftParams)
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray sto...
void subspaceRotation(T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const dftfe::uInt subspaceVectorsArrayLocalSize, const dftfe::uInt N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool isRotationMatLowerTria=false, const bool doCommAfterBandParal=true)
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (...
void createGlobalToLocalIdMapsScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftfe::ScaLAPACKMatrix< T > &mat, std::unordered_map< dftfe::uInt, dftfe::uInt > &globalToLocalRowIdMap, std::unordered_map< dftfe::uInt, dftfe::uInt > &globalToLocalColumnIdMap)
Creates global row/column id to local row/column ids for dftfe::ScaLAPACKMatrix.
void subspaceRotationCGSMixedPrec(T *subspaceVectorsArray, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const dftfe::uInt subspaceVectorsArrayLocalSize, const dftfe::uInt N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &rotationMatPar, const dftParameters &dftParams, const bool rotationMatTranspose=false, const bool doCommAfterBandParal=true)
Computes X^{T}=Q*X^{T} inplace. X^{T} is the subspaceVectorsArray stored in the column major format (...
void subspaceRotationSpectrumSplitMixedPrec(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, T *Y, const dftfe::uInt subspaceVectorsArrayLocalSize, const dftfe::uInt N, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const dftfe::uInt numberTopVectors, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, const dftfe::ScaLAPACKMatrix< T > &QMat, const dftParameters &dftParams, const bool QMatTranspose=false)
Computes Y^{T}=Q*X^{T}.
void sumAcrossInterCommScaLAPACKMat(const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, dftfe::ScaLAPACKMatrix< T > &mat, const MPI_Comm &interComm)
Mpi all reduce of ScaLAPACKMat across a given inter communicator. Used for band parallelization.
void fillParallelOverlapMatrix(const T *X, const std::shared_ptr< dftfe::linearAlgebra::BLASWrapper< dftfe::utils::MemorySpace::HOST > > &BLASWrapperPtr, const dftfe::uInt XLocalSize, const dftfe::uInt numberVectors, const std::shared_ptr< const dftfe::ProcessGrid > &processGrid, const MPI_Comm &interBandGroupComm, const MPI_Comm &mpiComm, dftfe::ScaLAPACKMatrix< T > &overlapMatPar, const dftParameters &dftParams)
Computes Sc=X^{T}*Xc and stores in a parallel ScaLAPACK matrix. X^{T} is the subspaceVectorsArray sto...
Contains linear algebra functions used in the implementation of an eigen solver.
Definition linearAlgebraOperations.h:502
Definition pseudoPotentialToDftfeConverter.cc:34
std::uint32_t uInt
Definition TypeConfig.h:10