DFT-EFE
 
Loading...
Searching...
No Matches
BlasLapackKernels.h
Go to the documentation of this file.
1#ifndef dftefeKernels_h
2#define dftefeKernels_h
3
7#include <blas.hh>
8#include <vector>
9
10namespace dftefe
11{
12 namespace linearAlgebra
13 {
14 namespace blasLapack
15 {
19 template <typename ValueType1,
20 typename ValueType2,
23 {
24 public:
32 static void
33 ascale(size_type size,
34 ValueType1 alpha,
35 const ValueType2 * x,
37
45 static void
46 ascale(size_type size,
47 ValueType1 alpha,
48 const ValueType2 * x,
49 const ScalarOp & opalpha,
50 const ScalarOp & opx,
52
59 static void
61 const ValueType1 alpha,
62 const ValueType2 * x,
64 /*
65 * @brief Template for performing \f$ z_i = x_i * y_i$
66 * @param[in] size size of the array
67 * @param[in] x array
68 * @param[in] y array
69 * @param[out] z array
70 */
71 static void
73 const ValueType1 * x,
74 const ValueType2 * y,
76
77 // /*
78 // * @brief Template for performing \f$ blockedOutput_ij = blockedInput_ij * singleVectorInput_i$
79 // * @param[in] size size of the blocked Input array
80 // * @param[in] numComponets no of componets
81 // * @param[in] blockedInput blocked array
82 // * @param[in] singleVectorInput array
83 // * @param[out] blockedOutput blocked array
84 // */
85 // static void
86 // blockedHadamardProduct(const size_type vecSize,
87 // const size_type numComponents,
88 // const ValueType1 * blockedInput,
89 // const ValueType2 * singleVectorInput,
90 // scalar_type<ValueType1, ValueType2> *blockedOutput);
91
92 static void
94 const ValueType1 * x,
95 const ValueType2 * y,
96 const ScalarOp & opx,
97 const ScalarOp & opy,
99
127 static void
128 scaleStridedVarBatched(const size_type numMats,
129 const Layout layout,
130 const ScalarOp & scalarOpA,
131 const ScalarOp & scalarOpB,
132 const size_type * stridea,
133 const size_type * strideb,
134 const size_type * stridec,
135 const size_type * m,
136 const size_type * n,
137 const size_type * k,
138 const ValueType1 * dA,
139 const ValueType2 * dB,
142
167 static void
168 khatriRaoProduct(const Layout layout,
169 const size_type sizeI,
170 const size_type sizeJ,
171 const size_type sizeK,
172 const ValueType1 * A,
173 const ValueType2 * B,
175
200 static void
202 const Layout layout,
203 const size_type numMats,
204 const size_type * stridea,
205 const size_type * strideb,
206 const size_type * stridec,
207 const size_type * m,
208 const size_type * n,
209 const size_type * k,
210 const ValueType1 * dA,
211 const ValueType2 * dB,
214
239 static void
241 const size_type sizeI,
242 const size_type sizeJ,
243 const size_type sizeK,
244 const ValueType1 * A,
245 const ValueType2 * B,
247
257 static void
258 axpby(size_type size,
260 const ValueType1 * x,
262 const ValueType2 * y,
264
274 static void
275 axpbyBlocked(const size_type size,
276 const size_type blockSize,
279 const ValueType1 * x,
282 const ValueType2 * y,
284
303 static void
305 size_type numVec,
306 const ValueType1 * multiVecDataX,
307 const ValueType2 * multiVecDataY,
308 const ScalarOp & opX,
309 const ScalarOp & opY,
310 scalar_type<ValueType1, ValueType2> *multiVecDotProduct,
312 };
313
314 template <typename ValueType, dftefe::utils::MemorySpace memorySpace>
316 {
317 public:
327 static std::vector<double>
329 size_type numVec,
330 const ValueType *multiVecData);
331
341 static std::vector<double>
343 size_type numVec,
344 const ValueType * multiVecData,
346 };
347
348#ifdef DFTEFE_WITH_DEVICE
349 template <typename ValueType1, typename ValueType2>
350 class KernelsTwoValueTypes<ValueType1,
351 ValueType2,
353 {
354 public:
355 static void
356 ascale(size_type size,
357 ValueType1 alpha,
358 const ValueType2 * x,
360
361 /*
362 * @brief Template for performing \f$ z = 1 /x$, does not check if x[i] is zero
363 * @param[in] size size of the array
364 * @param[in] x array
365 * @param[out] z array
366 */
367 static void
369 const ValueType1 alpha,
370 const ValueType2 * x,
372
373 static void
375 const ValueType1 * x,
376 const ValueType2 * y,
378
379 static void
381 const ValueType1 * x,
382 const ValueType2 * y,
383 const ScalarOp & opx,
384 const ScalarOp & opy,
386
387
388 static void
389 khatriRaoProduct(const Layout layout,
390 const size_type sizeI,
391 const size_type sizeJ,
392 const size_type sizeK,
393 const ValueType1 * A,
394 const ValueType2 * B,
396
397 static void
398 axpby(size_type size,
400 const ValueType1 * x,
402 const ValueType2 * y,
404
405 static void
407 size_type vecSize,
408 size_type numVec,
409 const ValueType1 * multiVecDataX,
410 const ValueType2 * multiVecDataY,
411 const ScalarOp & opX,
412 const ScalarOp & opY,
413 scalar_type<ValueType1, ValueType2> *multiVecDotProduct,
415 };
416
417 template <typename ValueType>
418 class KernelsOneValueType<ValueType, dftefe::utils::MemorySpace::DEVICE>
419 {
420 public:
421 static std::vector<double>
423 size_type numVec,
424 const ValueType *multiVecData);
425
426
427 static std::vector<double>
429 size_type vecSize,
430 size_type numVec,
431 const ValueType * multiVecData,
433 };
434
435#endif
436
437 } // namespace blasLapack
438 } // namespace linearAlgebra
439} // namespace dftefe
440
441#endif // dftefeKernels_h
Definition: LinAlgOpContext.h:38
static std::vector< double > nrms2MultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData, LinAlgOpContext< memorySpace > &context)
Template for computing norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:622
static std::vector< double > amaxsMultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData)
Template for computing norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:595
namespace class for BlasLapack kernels not present in blaspp.
Definition: BlasLapackKernels.h:23
static void khatriRaoProductStridedVarBatched(const Layout layout, const size_type numMats, const size_type *stridea, const size_type *strideb, const size_type *stridec, const size_type *m, const size_type *n, const size_type *k, const ValueType1 *dA, const ValueType2 *dB, scalar_type< ValueType1, ValueType2 > *dC, LinAlgOpContext< memorySpace > &context)
Template for performing khatriRao but with variable stride In column major storage format: ,...
Definition: BlasLapackKernels.cpp:389
static void axpby(size_type size, scalar_type< ValueType1, ValueType2 > alpha, const ValueType1 *x, scalar_type< ValueType1, ValueType2 > beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:456
static void scaleStridedVarBatched(const size_type numMats, const Layout layout, const ScalarOp &scalarOpA, const ScalarOp &scalarOpB, const size_type *stridea, const size_type *strideb, const size_type *stridec, const size_type *m, const size_type *n, const size_type *k, const ValueType1 *dA, const ValueType2 *dB, scalar_type< ValueType1, ValueType2 > *dC, LinAlgOpContext< memorySpace > &context)
Template for performing hadamard product of two columns of batches of matrix A and B having num col A...
Definition: BlasLapackKernels.cpp:282
static void transposedKhatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format: , where is matrix, is ,...
Definition: BlasLapackKernels.cpp:424
static void hadamardProduct(size_type size, const ValueType1 *x, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing z_i = x_i * y_i$.
Definition: BlasLapackKernels.cpp:184
static void khatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format: , where is matrix, is ,...
Definition: BlasLapackKernels.cpp:356
static void dotMultiVector(size_type vecSize, size_type numVec, const ValueType1 *multiVecDataX, const ValueType2 *multiVecDataY, const ScalarOp &opX, const ScalarOp &opY, scalar_type< ValueType1, ValueType2 > *multiVecDotProduct, LinAlgOpContext< memorySpace > &context)
Template for computing dot products numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:508
static void axpbyBlocked(const size_type size, const size_type blockSize, const scalar_type< ValueType1, ValueType2 > alpha1, const scalar_type< ValueType1, ValueType2 > *alpha, const ValueType1 *x, const scalar_type< ValueType1, ValueType2 > beta1, const scalar_type< ValueType1, ValueType2 > *beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:477
static void ascale(size_type size, ValueType1 alpha, const ValueType2 *x, const ScalarOp &opalpha, const ScalarOp &opx, scalar_type< ValueType1, ValueType2 > *z)
Template for performing alpha z = \alpha x$.
Definition: BlasLapackKernels.cpp:91
blas::Layout Layout
Definition: BlasLapackTypedef.h:46
blas::scalar_type< ValueType1, ValueType2 > scalar_type
Definition: BlasLapackTypedef.h:70
void reciprocalX(size_type n, const ValueType1 alpha, ValueType2 const *x, scalar_type< ValueType1, ValueType2 > *y, LinAlgOpContext< memorySpace > &context)
Template for computing the multiplicative inverse of all the elements of x, does not check if any ele...
Definition: BlasLapack.t.cpp:83
ScalarOp
Definition: BlasLapackTypedef.h:56
MemorySpace
Definition: MemorySpaceType.h:37
dealii includes
Definition: AtomFieldDataSpherical.cpp:31
unsigned int size_type
Definition: TypeConfig.h:8