DFT-EFE
 
Loading...
Searching...
No Matches
BlasLapackKernels.h
Go to the documentation of this file.
1#ifndef dftefeKernels_h
2#define dftefeKernels_h
3
7#include <vector>
8
9namespace dftefe
10{
11 namespace linearAlgebra
12 {
13 namespace blasLapack
14 {
18 template <typename ValueType1,
19 typename ValueType2,
22 {
23 public:
31 static void
32 ascale(size_type size,
33 ValueType1 alpha,
34 const ValueType2 * x,
36
44 static void
45 ascale(size_type size,
46 ValueType1 alpha,
47 const ValueType2 * x,
48 const ScalarOp & opalpha,
49 const ScalarOp & opx,
51
58 static void
60 const ValueType1 alpha,
61 const ValueType2 * x,
63 /*
64 * @brief Template for performing \f$ z_i = x_i * y_i$
65 * @param[in] size size of the array
66 * @param[in] x array
67 * @param[in] y array
68 * @param[out] z array
69 */
70 static void
72 const ValueType1 * x,
73 const ValueType2 * y,
75
76 // /*
77 // * @brief Template for performing \f$ blockedOutput_ij = blockedInput_ij * singleVectorInput_i$
78 // * @param[in] size size of the blocked Input array
79 // * @param[in] numComponets no of componets
80 // * @param[in] blockedInput blocked array
81 // * @param[in] singleVectorInput array
82 // * @param[out] blockedOutput blocked array
83 // */
84 // static void
85 // blockedHadamardProduct(const size_type vecSize,
86 // const size_type numComponents,
87 // const ValueType1 * blockedInput,
88 // const ValueType2 * singleVectorInput,
89 // scalar_type<ValueType1, ValueType2> *blockedOutput);
90
91 static void
93 const ValueType1 * x,
94 const ValueType2 * y,
95 const ScalarOp & opx,
96 const ScalarOp & opy,
98
126 static void
127 scaleStridedVarBatched(const size_type numMats,
128 const Layout layout,
129 const ScalarOp & scalarOpA,
130 const ScalarOp & scalarOpB,
131 const size_type * stridea,
132 const size_type * strideb,
133 const size_type * stridec,
134 const size_type * m,
135 const size_type * n,
136 const size_type * k,
137 const ValueType1 * dA,
138 const ValueType2 * dB,
141
166 static void
167 khatriRaoProduct(const Layout layout,
168 const size_type sizeI,
169 const size_type sizeJ,
170 const size_type sizeK,
171 const ValueType1 * A,
172 const ValueType2 * B,
174
199 static void
201 const Layout layout,
202 const size_type numMats,
203 const size_type * stridea,
204 const size_type * strideb,
205 const size_type * stridec,
206 const size_type * m,
207 const size_type * n,
208 const size_type * k,
209 const ValueType1 * dA,
210 const ValueType2 * dB,
213
238 static void
240 const size_type sizeI,
241 const size_type sizeJ,
242 const size_type sizeK,
243 const ValueType1 * A,
244 const ValueType2 * B,
246
256 static void
257 axpby(size_type size,
259 const ValueType1 * x,
261 const ValueType2 * y,
263
273 static void
274 axpbyBlocked(const size_type size,
275 const size_type blockSize,
278 const ValueType1 * x,
281 const ValueType2 * y,
283
302 static void
304 size_type numVec,
305 const ValueType1 * multiVecDataX,
306 const ValueType2 * multiVecDataY,
307 const ScalarOp & opX,
308 const ScalarOp & opY,
309 scalar_type<ValueType1, ValueType2> *multiVecDotProduct,
311 };
312
313 template <typename ValueType, dftefe::utils::MemorySpace memorySpace>
315 {
316 public:
326 static std::vector<double>
328 size_type numVec,
329 const ValueType *multiVecData);
330
340 static std::vector<double>
342 size_type numVec,
343 const ValueType * multiVecData,
345 };
346
347#ifdef DFTEFE_WITH_DEVICE
348 template <typename ValueType1, typename ValueType2>
349 class KernelsTwoValueTypes<ValueType1,
350 ValueType2,
352 {
353 public:
354 static void
355 ascale(size_type size,
356 ValueType1 alpha,
357 const ValueType2 * x,
359
360 /*
361 * @brief Template for performing \f$ z = 1 /x$, does not check if x[i] is zero
362 * @param[in] size size of the array
363 * @param[in] x array
364 * @param[out] z array
365 */
366 static void
368 const ValueType1 alpha,
369 const ValueType2 * x,
371
372 static void
374 const ValueType1 * x,
375 const ValueType2 * y,
377
378 static void
380 const ValueType1 * x,
381 const ValueType2 * y,
382 const ScalarOp & opx,
383 const ScalarOp & opy,
385
386
387 static void
388 khatriRaoProduct(const Layout layout,
389 const size_type sizeI,
390 const size_type sizeJ,
391 const size_type sizeK,
392 const ValueType1 * A,
393 const ValueType2 * B,
395
396 static void
397 axpby(size_type size,
399 const ValueType1 * x,
401 const ValueType2 * y,
403
404 static void
406 size_type vecSize,
407 size_type numVec,
408 const ValueType1 * multiVecDataX,
409 const ValueType2 * multiVecDataY,
410 const ScalarOp & opX,
411 const ScalarOp & opY,
412 scalar_type<ValueType1, ValueType2> *multiVecDotProduct,
414 };
415
416 template <typename ValueType>
417 class KernelsOneValueType<ValueType, dftefe::utils::MemorySpace::DEVICE>
418 {
419 public:
420 static std::vector<double>
422 size_type numVec,
423 const ValueType *multiVecData);
424
425
426 static std::vector<double>
428 size_type vecSize,
429 size_type numVec,
430 const ValueType * multiVecData,
432 };
433
434#endif
435
436 } // namespace blasLapack
437 } // namespace linearAlgebra
438} // namespace dftefe
439
440#endif // dftefeKernels_h
Definition: LinAlgOpContext.h:38
static std::vector< double > nrms2MultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData, LinAlgOpContext< memorySpace > &context)
Template for computing norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:622
static std::vector< double > amaxsMultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData)
Template for computing norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:595
namespace class for BlasLapack kernels not present in blaspp.
Definition: BlasLapackKernels.h:22
static void khatriRaoProductStridedVarBatched(const Layout layout, const size_type numMats, const size_type *stridea, const size_type *strideb, const size_type *stridec, const size_type *m, const size_type *n, const size_type *k, const ValueType1 *dA, const ValueType2 *dB, scalar_type< ValueType1, ValueType2 > *dC, LinAlgOpContext< memorySpace > &context)
Template for performing khatriRao but with variable stride In column major storage format: ,...
Definition: BlasLapackKernels.cpp:389
static void axpby(size_type size, scalar_type< ValueType1, ValueType2 > alpha, const ValueType1 *x, scalar_type< ValueType1, ValueType2 > beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:456
static void scaleStridedVarBatched(const size_type numMats, const Layout layout, const ScalarOp &scalarOpA, const ScalarOp &scalarOpB, const size_type *stridea, const size_type *strideb, const size_type *stridec, const size_type *m, const size_type *n, const size_type *k, const ValueType1 *dA, const ValueType2 *dB, scalar_type< ValueType1, ValueType2 > *dC, LinAlgOpContext< memorySpace > &context)
Template for performing hadamard product of two columns of batches of matrix A and B having num col A...
Definition: BlasLapackKernels.cpp:282
static void transposedKhatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format: , where is matrix, is ,...
Definition: BlasLapackKernels.cpp:424
static void hadamardProduct(size_type size, const ValueType1 *x, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing z_i = x_i * y_i$.
Definition: BlasLapackKernels.cpp:184
static void khatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format: , where is matrix, is ,...
Definition: BlasLapackKernels.cpp:356
static void dotMultiVector(size_type vecSize, size_type numVec, const ValueType1 *multiVecDataX, const ValueType2 *multiVecDataY, const ScalarOp &opX, const ScalarOp &opY, scalar_type< ValueType1, ValueType2 > *multiVecDotProduct, LinAlgOpContext< memorySpace > &context)
Template for computing dot products numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:508
static void axpbyBlocked(const size_type size, const size_type blockSize, const scalar_type< ValueType1, ValueType2 > alpha1, const scalar_type< ValueType1, ValueType2 > *alpha, const ValueType1 *x, const scalar_type< ValueType1, ValueType2 > beta1, const scalar_type< ValueType1, ValueType2 > *beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:477
static void ascale(size_type size, ValueType1 alpha, const ValueType2 *x, const ScalarOp &opalpha, const ScalarOp &opx, scalar_type< ValueType1, ValueType2 > *z)
Template for performing alpha z = \alpha x$.
Definition: BlasLapackKernels.cpp:91
Layout
Definition: BlasLapackTypedef.h:161
typeInternal::scalar_type< ValueType1, ValueType2 > scalar_type
Definition: BlasLapackTypedef.h:183
void reciprocalX(size_type n, const ValueType1 alpha, ValueType2 const *x, scalar_type< ValueType1, ValueType2 > *y, LinAlgOpContext< memorySpace > &context)
Template for computing the multiplicative inverse of all the elements of x, does not check if any ele...
Definition: BlasLapack.t.cpp:78
ScalarOp
Definition: BlasLapackTypedef.h:169
MemorySpace
Definition: MemorySpaceType.h:37
dealii includes
Definition: AtomFieldDataSpherical.cpp:31
unsigned int size_type
Definition: TypeConfig.h:8