DFT-FE 1.1.0-pre
Density Functional Theory With Finite-Elements
Loading...
Searching...
No Matches
scalapackWrapper.h
Go to the documentation of this file.
1// ---------------------------------------------------------------------
2//
3// Copyright (c) 2019-2020 The Regents of the University of Michigan and DFT-FE
4// authors.
5//
6// This file is part of the DFT-FE code.
7//
8// The DFT-FE code is free software; you can use it, redistribute
9// it, and/or modify it under the terms of the GNU Lesser General
10// Public License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12// The full text of the license can be found in the file LICENSE at
13// the top level of the DFT-FE distribution.
14//
15// ---------------------------------------------------------------------
16//
17
18
19
20#ifndef ScaLAPACKMatrix_H_
21#define ScaLAPACKMatrix_H_
22
23#include "headers.h"
24#include "process_grid.h"
25#include "lapack_support.h"
26
27
28namespace dftfe
29{
30 /**
31 * @brief Scalapack wrapper adapted from dealii library and extended implementation to complex datatype
32 *
33 * @author Sambit Das
34 */
35 template <typename NumberType>
37 {
38 public:
39 /**
40 * Declare the type for container size.
41 */
42 using size_type = unsigned int;
43
44 /**
45 * Constructor for a rectangular matrix with @p n_rows and @p n_cols
46 * and distributed using the grid @p process_grid.
47 *
48 * The parameters @p row_block_size and @p column_block_size are the block sizes used
49 * for the block-cyclic distribution of the matrix.
50 * In general, it is recommended to use powers of $2$, e.g. $16,32,64,
51 * \dots$.
52 */
54 const size_type n_rows,
55 const size_type n_columns,
56 const std::shared_ptr<const dftfe::ProcessGrid> &process_grid,
57 const size_type row_block_size = 32,
61
62 /**
63 * Constructor for a square matrix of size @p size, and distributed
64 * using the process grid in @p process_grid.
65 *
66 * The parameter @p block_size is used for the block-cyclic distribution of the matrix.
67 * An identical block size is used for the rows and columns of the matrix.
68 * In general, it is recommended to use powers of $2$, e.g. $16,32,64,
69 * \dots$.
70 */
72 const size_type size,
73 const std::shared_ptr<const dftfe::ProcessGrid> &process_grid,
74 const size_type block_size = 32,
77
78
79 /**
80 * Initialize the rectangular matrix with @p n_rows and @p n_cols
81 * and distributed using the grid @p process_grid.
82 *
83 * The parameters @p row_block_size and @p column_block_size are the block sizes used
84 * for the block-cyclic distribution of the matrix.
85 * In general, it is recommended to use powers of $2$, e.g. $16,32,64,
86 * \dots$.
87 */
88 void
90 const size_type n_columns,
91 const std::shared_ptr<const dftfe::ProcessGrid> &process_grid,
92 const size_type row_block_size = 32,
96
97 /**
98 * Initialize the square matrix of size @p size and distributed using the grid @p process_grid.
99 *
100 * The parameter @p block_size is used for the block-cyclic distribution of the matrix.
101 * An identical block size is used for the rows and columns of the matrix.
102 * In general, it is recommended to use powers of $2$, e.g. $16,32,64,
103 * \dots$.
104 */
105 void
106 reinit(const size_type size,
107 const std::shared_ptr<const dftfe::ProcessGrid> &process_grid,
108 const size_type block_size = 32,
111
112
113 /**
114 * Assign @p property to this matrix.
115 */
116 void
118
119 /**
120 * Return current @p property of this matrix
121 */
124
125 /**
126 * Return current @p state of this matrix
127 */
129 get_state() const;
130
131
132 /**
133 * Copy the contents of the distributed matrix into a differently distributed matrix @p dest.
134 * The function also works for matrices with different process grids
135 * or block-cyclic distributions.
136 */
137 void
139
140
141 /**
142 * Complex conjugate.
143 */
144 void
146
147
148 /**
149 * The operations based on the input parameter @p transpose_B and the
150 * alignment conditions are summarized in the following table:
151 *
152 * | transpose_B | Block Sizes | Operation |
153 * | :---------: | :--------------------------: | :-------------------------------------------: |
154 * | false | $MB_A=MB_B$ <br> $NB_A=NB_B$ | $\mathbf{A} = a \mathbf{A} + b \mathbf{B}$ |
155 * | true | $MB_A=NB_B$ <br> $NB_A=MB_B$ | $\mathbf{A} = a \mathbf{A} + b \mathbf{B}^T$ |
156 *
157 * The matrices $\mathbf{A}$ and $\mathbf{B}$ must have the same process
158 * grid.
159 */
160 void
162 const NumberType a = 0.,
163 const NumberType b = 1.,
164 const bool transpose_B = false);
165
166
167 /**
168 * The operations based on the input parameter @p conjugate_transpose_B and the
169 * alignment conditions are summarized in the following table:
170 *
171 * | transpose_B | Block Sizes | Operation |
172 * | :---------: | :--------------------------: | :-------------------------------------------: |
173 * | false | $MB_A=MB_B$ <br> $NB_A=NB_B$ | $\mathbf{A} = a \mathbf{A} + b \mathbf{B}$ |
174 * | true | $MB_A=NB_B$ <br> $NB_A=MB_B$ | $\mathbf{A} = a \mathbf{A} + b \mathbf{B}^C$ |
175 *
176 * The matrices $\mathbf{A}$ and $\mathbf{B}$ must have the same process
177 * grid.
178 */
179 void
181 const NumberType a = 0.,
182 const NumberType b = 1.,
183 const bool conjugate_transpose_B = false);
184
185 /**
186 * Transposing assignment: $\mathbf{A} = \mathbf{B}^T$
187 *
188 * The matrices $\mathbf{A}$ and $\mathbf{B}$ must have the same process
189 * grid.
190 *
191 * The following alignment conditions have to be fulfilled: $MB_A=NB_B$ and
192 * $NB_A=MB_B$.
193 */
194 void
196
197
198 /**
199 * Transposing assignment: $\mathbf{A} = \mathbf{B}^C$
200 *
201 * The matrices $\mathbf{A}$ and $\mathbf{B}$ must have the same process
202 * grid.
203 *
204 * The following alignment conditions have to be fulfilled: $MB_A=NB_B$ and
205 * $NB_A=MB_B$.
206 */
207 void
209
210 /**
211 * Matrix-matrix-multiplication:
212 *
213 * The operations based on the input parameters and the alignment conditions
214 * are summarized in the following table:
215 *
216 * | transpose_A | transpose_B | Block Sizes | Operation |
217 * | :---------: | :---------: | :-------------------------------------------: | :-------------------------------------------------------------: |
218 * | false | false | $MB_A=MB_C$ <br> $NB_A=MB_B$ <br> $NB_B=NB_C$ | $\mathbf{C} = b \mathbf{A} \cdot \mathbf{B} + c \mathbf{C}$ |
219 * | false | true | $MB_A=MB_C$ <br> $NB_A=NB_B$ <br> $MB_B=NB_C$ | $\mathbf{C} = b \mathbf{A} \cdot \mathbf{B}^T + c \mathbf{C}$ |
220 * | true | false | $MB_A=MB_B$ <br> $NB_A=MB_C$ <br> $NB_B=NB_C$ | $\mathbf{C} = b \mathbf{A}^T \cdot \mathbf{B} + c \mathbf{C}$ |
221 * | true | true | $MB_A=NB_B$ <br> $NB_A=MB_C$ <br> $MB_B=NB_C$ | $\mathbf{C} = b \mathbf{A}^T \cdot \mathbf{B}^T + c \mathbf{C}$ |
222 *
223 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
224 * and that
225 * $\mathbf{C}$ already has the right size.
226 *
227 * The matrices $\mathbf{A}$, $\mathbf{B}$ and $\mathbf{C}$ must have the
228 * same process grid.
229 */
230 void
231 mult(const NumberType b,
233 const NumberType c,
235 const bool transpose_A = false,
236 const bool transpose_B = false) const;
237
238
239 /**
240 * Matrix-matrix-multiplication:
241 *
242 * The operations based on the input parameters and the alignment conditions
243 * are summarized in the following table:
244 *
245 * | conjugate_transpose_A | conjugate_transpose_B | Block Sizes | Operation |
246 * | :---------: | :---------: | :-------------------------------------------: | :-------------------------------------------------------------: |
247 * | false | false | $MB_A=MB_C$ <br> $NB_A=MB_B$ <br> $NB_B=NB_C$ | $\mathbf{C} = b \mathbf{A} \cdot \mathbf{B} + c \mathbf{C}$ |
248 * | false | true | $MB_A=MB_C$ <br> $NB_A=NB_B$ <br> $MB_B=NB_C$ | $\mathbf{C} = b \mathbf{A} \cdot \mathbf{B}^C + c \mathbf{C}$ |
249 * | true | false | $MB_A=MB_B$ <br> $NB_A=MB_C$ <br> $NB_B=NB_C$ | $\mathbf{C} = b \mathbf{A}^C \cdot \mathbf{B} + c \mathbf{C}$ |
250 * | true | true | $MB_A=NB_B$ <br> $NB_A=MB_C$ <br> $MB_B=NB_C$ | $\mathbf{C} = b \mathbf{A}^C \cdot \mathbf{B}^C + c \mathbf{C}$ |
251 *
252 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
253 * and that
254 * $\mathbf{C}$ already has the right size.
255 *
256 * The matrices $\mathbf{A}$, $\mathbf{B}$ and $\mathbf{C}$ must have the
257 * same process grid.
258 */
259 void
260 zmult(const NumberType b,
262 const NumberType c,
264 const bool conjugate_transpose_A = false,
265 const bool conjugate_transpose_B = false) const;
266
267
268 /**
269 * Matrix-matrix-multiplication.
270 *
271 * The optional parameter @p adding determines whether the result is
272 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
273 *
274 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A} \cdot \mathbf{B}$
275 *
276 * else $\mathbf{C} = \mathbf{A} \cdot \mathbf{B}$
277 *
278 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
279 * and that
280 * $\mathbf{C}$ already has the right size.
281 *
282 * The following alignment conditions have to be fulfilled: $MB_A=MB_C$,
283 * $NB_A=MB_B$ and $NB_B=NB_C$.
284 */
285 void
288 const bool adding = false) const;
289
290 /**
291 * Matrix-matrix-multiplication using transpose of $\mathbf{A}$.
292 *
293 * The optional parameter @p adding determines whether the result is
294 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
295 *
296 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A}^T \cdot \mathbf{B}$
297 *
298 * else $\mathbf{C} = \mathbf{A}^T \cdot \mathbf{B}$
299 *
300 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
301 * and that
302 * $\mathbf{C}$ already has the right size.
303 *
304 * The following alignment conditions have to be fulfilled: $MB_A=MB_B$,
305 * $NB_A=MB_C$ and $NB_B=NB_C$.
306 */
307 void
310 const bool adding = false) const;
311
312 /**
313 * Matrix-matrix-multiplication using the transpose of $\mathbf{B}$.
314 *
315 * The optional parameter @p adding determines whether the result is
316 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
317 *
318 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A} \cdot \mathbf{B}^T$
319 *
320 * else $\mathbf{C} = \mathbf{A} \cdot \mathbf{B}^T$
321 *
322 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
323 * and that
324 * $\mathbf{C}$ already has the right size.
325 *
326 * The following alignment conditions have to be fulfilled: $MB_A=MB_C$,
327 * $NB_A=NB_B$ and $MB_B=NB_C$.
328 */
329 void
332 const bool adding = false) const;
333
334 /**
335 * Matrix-matrix-multiplication using transpose of $\mathbf{A}$ and
336 * $\mathbf{B}$.
337 *
338 * The optional parameter @p adding determines whether the result is
339 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
340 *
341 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A}^T \cdot
342 * \mathbf{B}^T$
343 *
344 * else $\mathbf{C} = \mathbf{A}^T \cdot \mathbf{B}^T$
345 *
346 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
347 * and that
348 * $\mathbf{C}$ already has the right size.
349 *
350 * The following alignment conditions have to be fulfilled: $MB_A=NB_B$,
351 * $NB_A=MB_C$ and $MB_B=NB_C$.
352 */
353 void
356 const bool adding = false) const;
357
358
359 /**
360 * Matrix-matrix-multiplication.
361 *
362 * The optional parameter @p adding determines whether the result is
363 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
364 *
365 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A} \cdot \mathbf{B}$
366 *
367 * else $\mathbf{C} = \mathbf{A} \cdot \mathbf{B}$
368 *
369 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
370 * and that
371 * $\mathbf{C}$ already has the right size.
372 *
373 * The following alignment conditions have to be fulfilled: $MB_A=MB_C$,
374 * $NB_A=MB_B$ and $NB_B=NB_C$.
375 */
376 void
379 const bool adding = false) const;
380
381 /**
382 * Matrix-matrix-multiplication using conjugate transpose of $\mathbf{A}$.
383 *
384 * The optional parameter @p adding determines whether the result is
385 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
386 *
387 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A}^C \cdot \mathbf{B}$
388 *
389 * else $\mathbf{C} = \mathbf{A}^C \cdot \mathbf{B}$
390 *
391 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
392 * and that
393 * $\mathbf{C}$ already has the right size.
394 *
395 * The following alignment conditions have to be fulfilled: $MB_A=MB_B$,
396 * $NB_A=MB_C$ and $NB_B=NB_C$.
397 */
398 void
401 const bool adding = false) const;
402
403 /**
404 * Matrix-matrix-multiplication using the conjugate transpose of
405 * $\mathbf{B}$.
406 *
407 * The optional parameter @p adding determines whether the result is
408 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
409 *
410 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A} \cdot \mathbf{B}^C$
411 *
412 * else $\mathbf{C} = \mathbf{A} \cdot \mathbf{B}^C$
413 *
414 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
415 * and that
416 * $\mathbf{C}$ already has the right size.
417 *
418 * The following alignment conditions have to be fulfilled: $MB_A=MB_C$,
419 * $NB_A=NB_B$ and $MB_B=NB_C$.
420 */
421 void
424 const bool adding = false) const;
425
426 /**
427 * Matrix-matrix-multiplication using conjugate transpose of $\mathbf{A}$
428 * and
429 * $\mathbf{B}$.
430 *
431 * The optional parameter @p adding determines whether the result is
432 * stored in $\mathbf{C}$ or added to $\mathbf{C}$.
433 *
434 * if (@p adding) $\mathbf{C} = \mathbf{C} + \mathbf{A}^C \cdot
435 * \mathbf{B}^T$
436 *
437 * else $\mathbf{C} = \mathbf{A}^C \cdot \mathbf{B}^C$
438 *
439 * It is assumed that $\mathbf{A}$ and $\mathbf{B}$ have compatible sizes
440 * and that
441 * $\mathbf{C}$ already has the right size.
442 *
443 * The following alignment conditions have to be fulfilled: $MB_A=NB_B$,
444 * $NB_A=MB_C$ and $MB_B=NB_C$.
445 */
446 void
449 const bool adding = false) const;
450
451
452 /**
453 * Number of rows of the $M \times N$ matrix.
454 */
456 m() const;
457
458 /**
459 * Number of columns of the $M \times N$ matrix.
460 */
462 n() const;
463
464 /**
465 * Number of local rows on this MPI processes.
466 */
467 unsigned int
468 local_m() const;
469
470 /**
471 * Number of local columns on this MPI process.
472 */
473 unsigned int
474 local_n() const;
475
476 /**
477 * Return the global row number for the given local row @p loc_row .
478 */
479 unsigned int
480 global_row(const unsigned int loc_row) const;
481
482 /**
483 * Return the global column number for the given local column @p loc_column.
484 */
485 unsigned int
486 global_column(const unsigned int loc_column) const;
487
488 /**
489 * Read access to local element.
490 */
491 NumberType
492 local_el(const unsigned int loc_row, const unsigned int loc_column) const;
493
494 /**
495 * Write access to local element.
496 */
497 NumberType &
498 local_el(const unsigned int loc_row, const unsigned int loc_column);
499
500 /**
501 * Compute the Cholesky factorization of the matrix using ScaLAPACK
502 * function <code>pXpotrf</code>. The result of the factorization is stored
503 * in this object.
504 */
505 void
507
508 /**
509 * Compute the LU factorization of the matrix using ScaLAPACK
510 * function <code>pXgetrf</code> and partial pivoting with row interchanges.
511 * The result of the factorization is stored in this object.
512 */
513 void
515
516 /**
517 * Invert the matrix by first computing a Cholesky for hermitian matrices
518 * or a LU factorization for general matrices and then
519 * building the actual inverse using <code>pXpotri</code> or
520 * <code>pXgetri</code>. If the matrix is triangular, the LU factorization
521 * step is skipped, and <code>pXtrtri</code> is used directly.
522 *
523 * If a Cholesky or LU factorization has been applied previously,
524 * <code>pXpotri</code> or <code>pXgetri</code> are called directly.
525 *
526 * The inverse is stored in this object.
527 */
528 void
530
531 /**
532 * Scale the columns of the distributed matrix by the scalars provided in the array @p factors.
533 *
534 * The array @p factors must have as many entries as the matrix columns.
535 *
536 * Copies of @p factors have to be available on all processes of the underlying MPI communicator.
537 */
538 void
539 scale_columns(const std::vector<NumberType> &factors);
540
541 /**
542 * Scale the rows of the distributed matrix by the scalars provided in the array @p factors.
543 *
544 * The array @p factors must have as many entries as the matrix rows.
545 *
546 * Copies of @p factors have to be available on all processes of the underlying MPI communicator.
547 */
548 void
549 scale_rows(const std::vector<NumberType> &factors);
550
551
552 /**
553 * Scale the columns of the distributed matrix by the scalars provided in the array @p factors.
554 *
555 * The array @p factors must have as many entries as the matrix columns.
556 *
557 * Copies of @p factors have to be available on all processes of the underlying MPI communicator.
558 */
559 void
560 scale_columns_realfactors(const std::vector<double> &factors);
561
562 /**
563 * Scale the rows of the distributed matrix by the scalars provided in the array @p factors.
564 *
565 * The array @p factors must have as many entries as the matrix rows.
566 *
567 * Copies of @p factors have to be available on all processes of the underlying MPI communicator.
568 */
569 void
570 scale_rows_realfactors(const std::vector<double> &factors);
571
572 /**
573 * Computing selected eigenvalues and, optionally, the eigenvectors of the
574 * real hermitian matrix $\mathbf{A} \in \mathbb{R}^{M \times M}$.
575 *
576 * The eigenvalues/eigenvectors are selected by prescribing a range of indices @p index_limits.
577 *
578 * If successful, the computed eigenvalues are arranged in ascending order.
579 * The eigenvectors are stored in the columns of the matrix, thereby
580 * overwriting the original content of the matrix.
581 *
582 * If all eigenvalues/eigenvectors have to be computed, pass the closed interval $ \left[ 0, M-1 \right] $ in @p index_limits.
583 *
584 * Pass the closed interval $ \left[ M-r, M-1 \right] $ if the $r$ largest
585 * eigenvalues/eigenvectors are desired.
586 */
587 std::vector<double>
589 const std::pair<unsigned int, unsigned int> &index_limits,
590 const bool compute_eigenvectors);
591
592 /**
593 * Computing selected eigenvalues and, optionally, the eigenvectors of the
594 * real hermitian matrix $\mathbf{A} \in \mathbb{R}^{M \times M}$ using the
595 * MRRR algorithm.
596 *
597 * The eigenvalues/eigenvectors are selected by prescribing a range of indices @p index_limits.
598 *
599 * If successful, the computed eigenvalues are arranged in ascending order.
600 * The eigenvectors are stored in the columns of the matrix, thereby
601 * overwriting the original content of the matrix.
602 *
603 * If all eigenvalues/eigenvectors have to be computed, pass the closed interval $ \left[ 0, M-1 \right] $ in @p index_limits.
604 *
605 * Pass the closed interval $ \left[ M-r, M-1 \right] $ if the $r$ largest
606 * eigenvalues/eigenvectors are desired.
607 */
608 std::vector<double>
610 const std::pair<unsigned int, unsigned int> &index_limits,
611 const bool compute_eigenvectors);
612
613 private:
614 /**
615 * Computing selected eigenvalues and, optionally, the eigenvectors.
616 * The eigenvalues/eigenvectors are selected by either prescribing a range of indices @p index_limits
617 * or a range of values @p value_limits for the eigenvalues. The function will throw an exception
618 * if both ranges are prescribed (meaning that both ranges differ from the
619 * default value) as this ambiguity is prohibited. If successful, the
620 * computed eigenvalues are arranged in ascending order. The eigenvectors
621 * are stored in the columns of the matrix, thereby overwriting the original
622 * content of the matrix.
623 */
624 std::vector<double>
626 const bool compute_eigenvectors,
627 const std::pair<unsigned int, unsigned int> &index_limits =
628 std::make_pair(dealii::numbers::invalid_unsigned_int,
629 dealii::numbers::invalid_unsigned_int),
630 const std::pair<double, double> &value_limits =
631 std::make_pair(std::numeric_limits<double>::quiet_NaN(),
632 std::numeric_limits<double>::quiet_NaN()));
633
634 /**
635 * Computing selected eigenvalues and, optionally, the eigenvectors of the
636 * real hermitian matrix $\mathbf{A} \in \mathbb{R}^{M \times M}$ using the
637 * MRRR algorithm.
638 * The eigenvalues/eigenvectors are selected by either prescribing a range of indices @p index_limits
639 * or a range of values @p value_limits for the eigenvalues. The function will throw an exception
640 * if both ranges are prescribed (meaning that both ranges differ from the
641 * default value) as this ambiguity is prohibited.
642 *
643 * By calling this function the original content of the matrix will be
644 * overwritten. If requested, the eigenvectors are stored in the columns of
645 * the matrix. Also in the case that just the eigenvalues are required, the
646 * content of the matrix will be overwritten.
647 *
648 * If successful, the computed eigenvalues are arranged in ascending order.
649 *
650 * @note Due to a bug in Netlib-ScaLAPACK, either all or no eigenvectors can be computed.
651 * Therefore, the input @p index_limits has to be set accordingly. Using Intel-MKL this restriction is not required.
652 */
653 std::vector<double>
655 const bool compute_eigenvectors,
656 const std::pair<unsigned int, unsigned int> &index_limits =
657 std::make_pair(dealii::numbers::invalid_unsigned_int,
658 dealii::numbers::invalid_unsigned_int),
659 const std::pair<double, double> &value_limits =
660 std::make_pair(std::numeric_limits<double>::quiet_NaN(),
661 std::numeric_limits<double>::quiet_NaN()));
662
663
664 /**
665 * local storage
666 */
667 std::vector<NumberType> values;
668
669 /**
670 * Since ScaLAPACK operations notoriously change the meaning of the matrix
671 * entries, we record the current state after the last operation here.
672 */
674
675 /**
676 * Additional property of the matrix which may help to select more
677 * efficient ScaLAPACK functions.
678 */
680
681 /**
682 * A shared pointer to a dealii::Utilities::MPI::ProcessGrid object which
683 * contains a BLACS context and a MPI communicator, as well as other
684 * necessary data structures.
685 */
686 std::shared_ptr<const dftfe::ProcessGrid> grid;
687
688 /**
689 * Number of rows in the matrix.
690 */
692
693 /**
694 * Number of columns in the matrix.
695 */
697
698 /**
699 * Row block size.
700 */
702
703 /**
704 * Column block size.
705 */
707
708 /**
709 * Number of rows in the matrix owned by the current process.
710 */
712
713 /**
714 * Number of columns in the matrix owned by the current process.
715 */
717
718 /**
719 * ScaLAPACK description vector.
720 */
722
723 /**
724 * Workspace array.
725 */
726 mutable std::vector<NumberType> work;
727
728 /**
729 * Integer workspace array.
730 */
731 mutable std::vector<int> iwork;
732
733 /**
734 * Integer array holding pivoting information required
735 * by ScaLAPACK's matrix factorization routines.
736 */
737 std::vector<int> ipiv;
738
739 /**
740 * A character to define where elements are stored in case
741 * ScaLAPACK operations support this.
742 */
743 const char uplo;
744
745 /**
746 * The process row of the process grid over which the first row
747 * of the global matrix is distributed.
748 */
750
751 /**
752 * The process column of the process grid over which the first column
753 * of the global matrix is distributed.
754 */
756
757 /**
758 * Global row index that determines where to start a submatrix.
759 * Currently this equals unity, as we don't use submatrices.
760 */
761 const int submatrix_row;
762
763 /**
764 * Global column index that determines where to start a submatrix.
765 * Currently this equals unity, as we don't use submatrices.
766 */
768
769 /**
770 * Thread mutex.
771 */
772 mutable dealii::Threads::Mutex mutex;
773 };
774
775 // ----------------------- inline functions ----------------------------
776
777#ifndef DOXYGEN
778
779 template <typename NumberType>
780 inline NumberType
781 ScaLAPACKMatrix<NumberType>::local_el(const unsigned int loc_row,
782 const unsigned int loc_column) const
783 {
784 return values[loc_column * n_local_rows + loc_row];
785 // return (*this)(loc_row, loc_column);
786 }
787
788
789
790 template <typename NumberType>
791 inline NumberType &
792 ScaLAPACKMatrix<NumberType>::local_el(const unsigned int loc_row,
793 const unsigned int loc_column)
794 {
795 return values[loc_column * n_local_rows + loc_row];
796 // return (*this)(loc_row, loc_column);
797 }
798
799
800 template <typename NumberType>
801 inline unsigned int
803 {
804 return n_rows;
805 }
806
807
808
809 template <typename NumberType>
810 inline unsigned int
812 {
813 return n_columns;
814 }
815
816
817
818 template <typename NumberType>
819 unsigned int
821 {
822 return n_local_rows;
823 }
824
825
826
827 template <typename NumberType>
828 unsigned int
833
834
835#endif // DOXYGEN
836
837
838} // namespace dftfe
839#endif // ScaLAPACKMatrix_H_
void copy_conjugate_transposed(const ScaLAPACKMatrix< NumberType > &B)
std::vector< NumberType > work
Definition scalapackWrapper.h:726
NumberType local_el(const unsigned int loc_row, const unsigned int loc_column) const
Definition scalapackWrapper.h:781
dftfe::LAPACKSupport::Property property
Definition scalapackWrapper.h:679
unsigned int local_n() const
Definition scalapackWrapper.h:829
void reinit(const size_type size, const std::shared_ptr< const dftfe::ProcessGrid > &process_grid, const size_type block_size=32, const dftfe::LAPACKSupport::Property property=dftfe::LAPACKSupport::Property::hermitian)
unsigned int global_column(const unsigned int loc_column) const
std::vector< NumberType > values
Definition scalapackWrapper.h:667
void reinit(const size_type n_rows, const size_type n_columns, const std::shared_ptr< const dftfe::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const dftfe::LAPACKSupport::Property property=dftfe::LAPACKSupport::Property::general)
dftfe::LAPACKSupport::State get_state() const
std::vector< int > ipiv
Definition scalapackWrapper.h:737
void mult(const NumberType b, const ScaLAPACKMatrix< NumberType > &B, const NumberType c, ScaLAPACKMatrix< NumberType > &C, const bool transpose_A=false, const bool transpose_B=false) const
std::shared_ptr< const dftfe::ProcessGrid > grid
Definition scalapackWrapper.h:686
void copy_to(ScaLAPACKMatrix< NumberType > &dest) const
void zmmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
unsigned int local_m() const
Definition scalapackWrapper.h:820
size_type n() const
Definition scalapackWrapper.h:811
void add(const ScaLAPACKMatrix< NumberType > &B, const NumberType a=0., const NumberType b=1., const bool transpose_B=false)
int n_rows
Definition scalapackWrapper.h:691
unsigned int size_type
Definition scalapackWrapper.h:42
std::vector< double > eigenpairs_hermitian_by_index_MRRR(const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors)
size_type m() const
Definition scalapackWrapper.h:802
void set_property(const dftfe::LAPACKSupport::Property property)
ScaLAPACKMatrix(const size_type size, const std::shared_ptr< const dftfe::ProcessGrid > &process_grid, const size_type block_size=32, const dftfe::LAPACKSupport::Property property=dftfe::LAPACKSupport::Property::hermitian)
dftfe::LAPACKSupport::Property get_property() const
void zCmmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
int n_columns
Definition scalapackWrapper.h:696
dealii::Threads::Mutex mutex
Definition scalapackWrapper.h:772
void zadd(const ScaLAPACKMatrix< NumberType > &B, const NumberType a=0., const NumberType b=1., const bool conjugate_transpose_B=false)
int n_local_columns
Definition scalapackWrapper.h:716
void compute_cholesky_factorization()
void Tmmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
void mTmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
std::vector< int > iwork
Definition scalapackWrapper.h:731
void zmCmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
ScaLAPACKMatrix(const size_type n_rows, const size_type n_columns, const std::shared_ptr< const dftfe::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const dftfe::LAPACKSupport::Property property=dftfe::LAPACKSupport::Property::general)
void zmult(const NumberType b, const ScaLAPACKMatrix< NumberType > &B, const NumberType c, ScaLAPACKMatrix< NumberType > &C, const bool conjugate_transpose_A=false, const bool conjugate_transpose_B=false) const
void scale_columns_realfactors(const std::vector< double > &factors)
const int submatrix_row
Definition scalapackWrapper.h:761
void scale_rows_realfactors(const std::vector< double > &factors)
dftfe::LAPACKSupport::State state
Definition scalapackWrapper.h:673
std::vector< double > eigenpairs_hermitian_by_index(const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors)
void scale_columns(const std::vector< NumberType > &factors)
void TmTmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
const int submatrix_column
Definition scalapackWrapper.h:767
int column_block_size
Definition scalapackWrapper.h:706
unsigned int global_row(const unsigned int loc_row) const
std::vector< double > eigenpairs_hermitian_MRRR(const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(dealii::numbers::invalid_unsigned_int, dealii::numbers::invalid_unsigned_int), const std::pair< double, double > &value_limits=std::make_pair(std::numeric_limits< double >::quiet_NaN(), std::numeric_limits< double >::quiet_NaN()))
void copy_transposed(const ScaLAPACKMatrix< NumberType > &B)
void mmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
int n_local_rows
Definition scalapackWrapper.h:711
const int first_process_column
Definition scalapackWrapper.h:755
int row_block_size
Definition scalapackWrapper.h:701
int descriptor[9]
Definition scalapackWrapper.h:721
const char uplo
Definition scalapackWrapper.h:743
void zCmCmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
const int first_process_row
Definition scalapackWrapper.h:749
void scale_rows(const std::vector< NumberType > &factors)
std::vector< double > eigenpairs_hermitian(const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(dealii::numbers::invalid_unsigned_int, dealii::numbers::invalid_unsigned_int), const std::pair< double, double > &value_limits=std::make_pair(std::numeric_limits< double >::quiet_NaN(), std::numeric_limits< double >::quiet_NaN()))
State
Definition lapack_support.h:54
Property
Definition lapack_support.h:107
@ general
No special properties.
Definition lapack_support.h:109
@ hermitian
Matrix is symmetric.
Definition lapack_support.h:111
Definition pseudoPotentialToDftfeConverter.cc:34