DFT-EFE
 
Loading...
Searching...
No Matches
BlasLapackKernels.h
Go to the documentation of this file.
1#ifndef dftefeKernels_h
2#define dftefeKernels_h
3
7#include <blas.hh>
8#include <vector>
9
10namespace dftefe
11{
12 namespace linearAlgebra
13 {
14 namespace blasLapack
15 {
19 template <typename ValueType1,
20 typename ValueType2,
23 {
24 public:
32 static void
33 ascale(size_type size,
34 ValueType1 alpha,
35 const ValueType2 * x,
37
44 static void
46 const ValueType1 alpha,
47 const ValueType2 * x,
49 /*
50 * @brief Template for performing \f$ z_i = x_i * y_i$
51 * @param[in] size size of the array
52 * @param[in] x array
53 * @param[in] y array
54 * @param[out] z array
55 */
56 static void
58 const ValueType1 * x,
59 const ValueType2 * y,
61
62 // /*
63 // * @brief Template for performing \f$ blockedOutput_ij = blockedInput_ij * singleVectorInput_i$
64 // * @param[in] size size of the blocked Input array
65 // * @param[in] numComponets no of componets
66 // * @param[in] blockedInput blocked array
67 // * @param[in] singleVectorInput array
68 // * @param[out] blockedOutput blocked array
69 // */
70 // static void
71 // blockedHadamardProduct(const size_type vecSize,
72 // const size_type numComponents,
73 // const ValueType1 * blockedInput,
74 // const ValueType2 * singleVectorInput,
75 // scalar_type<ValueType1, ValueType2> *blockedOutput);
76
77 static void
79 const ValueType1 * x,
80 const ValueType2 * y,
81 const ScalarOp & opx,
82 const ScalarOp & opy,
84
110 static void
111 scaleStridedVarBatched(const size_type numMats,
112 const ScalarOp * scalarOpA,
113 const ScalarOp * scalarOpB,
114 const size_type * stridea,
115 const size_type * strideb,
116 const size_type * stridec,
117 const size_type * m,
118 const size_type * n,
119 const size_type * k,
120 const ValueType1 * dA,
121 const ValueType2 * dB,
124
149 static void
150 khatriRaoProduct(const Layout layout,
151 const size_type sizeI,
152 const size_type sizeJ,
153 const size_type sizeK,
154 const ValueType1 * A,
155 const ValueType2 * B,
157
182 static void
184 const size_type sizeI,
185 const size_type sizeJ,
186 const size_type sizeK,
187 const ValueType1 * A,
188 const ValueType2 * B,
190
200 static void
201 axpby(size_type size,
203 const ValueType1 * x,
205 const ValueType2 * y,
207
217 static void
218 axpbyBlocked(const size_type size,
219 const size_type blockSize,
221 const ValueType1 * x,
223 const ValueType2 * y,
225
244 static void
246 size_type numVec,
247 const ValueType1 * multiVecDataX,
248 const ValueType2 * multiVecDataY,
249 const ScalarOp & opX,
250 const ScalarOp & opY,
251 scalar_type<ValueType1, ValueType2> *multiVecDotProduct,
253 };
254
255 template <typename ValueType, dftefe::utils::MemorySpace memorySpace>
257 {
258 public:
268 static std::vector<double>
270 size_type numVec,
271 const ValueType *multiVecData);
272
282 static std::vector<double>
284 size_type numVec,
285 const ValueType * multiVecData,
287 };
288
289#ifdef DFTEFE_WITH_DEVICE
290 template <typename ValueType1, typename ValueType2>
291 class KernelsTwoValueTypes<ValueType1,
292 ValueType2,
294 {
295 public:
296 static void
297 ascale(size_type size,
298 ValueType1 alpha,
299 const ValueType2 * x,
301
302 /*
303 * @brief Template for performing \f$ z = 1 /x$, does not check if x[i] is zero
304 * @param[in] size size of the array
305 * @param[in] x array
306 * @param[out] z array
307 */
308 static void
310 const ValueType1 alpha,
311 const ValueType2 * x,
313
314 static void
316 const ValueType1 * x,
317 const ValueType2 * y,
319
320 static void
322 const ValueType1 * x,
323 const ValueType2 * y,
324 const ScalarOp & opx,
325 const ScalarOp & opy,
327
328
329 static void
330 khatriRaoProduct(const Layout layout,
331 const size_type sizeI,
332 const size_type sizeJ,
333 const size_type sizeK,
334 const ValueType1 * A,
335 const ValueType2 * B,
337
338 static void
339 axpby(size_type size,
341 const ValueType1 * x,
343 const ValueType2 * y,
345
346 static void
348 size_type vecSize,
349 size_type numVec,
350 const ValueType1 * multiVecDataX,
351 const ValueType2 * multiVecDataY,
352 const ScalarOp & opX,
353 const ScalarOp & opY,
354 scalar_type<ValueType1, ValueType2> *multiVecDotProduct,
356 };
357
358 template <typename ValueType>
359 class KernelsOneValueType<ValueType, dftefe::utils::MemorySpace::DEVICE>
360 {
361 public:
362 static std::vector<double>
364 size_type numVec,
365 const ValueType *multiVecData);
366
367
368 static std::vector<double>
370 size_type vecSize,
371 size_type numVec,
372 const ValueType * multiVecData,
374 };
375
376#endif
377
378 } // namespace blasLapack
379 } // namespace linearAlgebra
380} // namespace dftefe
381
382#endif // dftefeKernels_h
Definition: LinAlgOpContext.h:38
static std::vector< double > nrms2MultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData, LinAlgOpContext< memorySpace > &context)
Template for computing norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:495
static std::vector< double > amaxsMultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData)
Template for computing norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:468
namespace class for BlasLapack kernels not present in blaspp.
Definition: BlasLapackKernels.h:23
static void axpby(size_type size, scalar_type< ValueType1, ValueType2 > alpha, const ValueType1 *x, scalar_type< ValueType1, ValueType2 > beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:333
static void reciprocalX(size_type size, const ValueType1 alpha, const ValueType2 *x, scalar_type< ValueType1, ValueType2 > *z)
Template for performing alpha z = 1 /x$, does not check if x[i] is zero.
Definition: BlasLapackKernels.cpp:109
static void transposedKhatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format: , where is matrix, is ,...
Definition: BlasLapackKernels.cpp:301
static void hadamardProduct(size_type size, const ValueType1 *x, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Definition: BlasLapackKernels.cpp:127
static void axpbyBlocked(const size_type size, const size_type blockSize, const scalar_type< ValueType1, ValueType2 > *alpha, const ValueType1 *x, const scalar_type< ValueType1, ValueType2 > *beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:354
static void khatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format: , where is matrix, is ,...
Definition: BlasLapackKernels.cpp:268
static void dotMultiVector(size_type vecSize, size_type numVec, const ValueType1 *multiVecDataX, const ValueType2 *multiVecDataY, const ScalarOp &opX, const ScalarOp &opY, scalar_type< ValueType1, ValueType2 > *multiVecDotProduct, LinAlgOpContext< memorySpace > &context)
Template for computing dot products numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:381
static void scaleStridedVarBatched(const size_type numMats, const ScalarOp *scalarOpA, const ScalarOp *scalarOpB, const size_type *stridea, const size_type *strideb, const size_type *stridec, const size_type *m, const size_type *n, const size_type *k, const ValueType1 *dA, const ValueType2 *dB, scalar_type< ValueType1, ValueType2 > *dC, LinAlgOpContext< memorySpace > &context)
Template for performing hadamard product of two columns of batches of matrix A and B having num col A...
Definition: BlasLapackKernels.cpp:225
blas::Layout Layout
Definition: BlasLapackTypedef.h:46
blas::scalar_type< ValueType1, ValueType2 > scalar_type
Definition: BlasLapackTypedef.h:70
ScalarOp
Definition: BlasLapackTypedef.h:56
void ascale(const size_type n, const ValueType1 alpha, const ValueType2 *x, scalar_type< ValueType1, ValueType2 > *z, LinAlgOpContext< memorySpace > &context)
Definition: BlasLapack.t.cpp:97
MemorySpace
Definition: MemorySpaceType.h:37
dealii includes
Definition: AtomFieldDataSpherical.cpp:31
unsigned int size_type
Definition: TypeConfig.h:8