dft-efe/BlasLapackKernels_8h_source.html

#ifndef dftefeKernels_h

#define dftefeKernels_h


#include <utils/MemoryManager.h>

#include <linearAlgebra/BlasLapackTypedef.h>

#include <linearAlgebra/LinAlgOpContext.h>

#include <blas.hh>

#include <vector>


namespace dftefe

{

  namespace linearAlgebra

  {

    namespace blasLapack

    {

      template <typename ValueType1,

                typename ValueType2,

                dftefe::utils::MemorySpace memorySpace>

      class KernelsTwoValueTypes

      {

      public:

        static void

        ascale(size_type                            size,

               ValueType1                           alpha,

               const ValueType2 *                   x,

               scalar_type<ValueType1, ValueType2> *z);


        static void

        ascale(size_type                            size,

               ValueType1                           alpha,

               const ValueType2 *                   x,

               const ScalarOp &                     opalpha,

               const ScalarOp &                     opx,

               scalar_type<ValueType1, ValueType2> *z);


        static void

        reciprocalX(size_type                            size,

                    const ValueType1                     alpha,

                    const ValueType2 *                   x,

                    scalar_type<ValueType1, ValueType2> *z);

        /*

         * @brief Template for performing \f$ z_i = x_i * y_i$

         * @param[in] size size of the array

         * @param[in] x array

         * @param[in] y array

         * @param[out] z array

         */

        static void

        hadamardProduct(size_type                            size,

                        const ValueType1 *                   x,

                        const ValueType2 *                   y,

                        scalar_type<ValueType1, ValueType2> *z);


        // /*

        //  * @brief Template for performing \f$ blockedOutput_ij = blockedInput_ij * singleVectorInput_i$

        //  * @param[in] size size of the blocked Input array

        //  * @param[in] numComponets no of componets

        //  * @param[in] blockedInput blocked array

        //  * @param[in] singleVectorInput array

        //  * @param[out] blockedOutput blocked array

        //  */

        // static void

        // blockedHadamardProduct(const size_type                      vecSize,

        //                 const size_type                      numComponents,

        //                 const ValueType1 *                   blockedInput,

        //                 const ValueType2 * singleVectorInput,

        //                 scalar_type<ValueType1, ValueType2> *blockedOutput);


        static void

        hadamardProduct(size_type                            size,

                        const ValueType1 *                   x,

                        const ValueType2 *                   y,

                        const ScalarOp &                     opx,

                        const ScalarOp &                     opy,

                        scalar_type<ValueType1, ValueType2> *z);


        static void

        scaleStridedVarBatched(const size_type                      numMats,

                               const Layout                         layout,

                               const ScalarOp &                     scalarOpA,

                               const ScalarOp &                     scalarOpB,

                               const size_type *                    stridea,

                               const size_type *                    strideb,

                               const size_type *                    stridec,

                               const size_type *                    m,

                               const size_type *                    n,

                               const size_type *                    k,

                               const ValueType1 *                   dA,

                               const ValueType2 *                   dB,

                               scalar_type<ValueType1, ValueType2> *dC,

                               LinAlgOpContext<memorySpace> &       context);


        static void

        khatriRaoProduct(const Layout                         layout,

                         const size_type                      sizeI,

                         const size_type                      sizeJ,

                         const size_type                      sizeK,

                         const ValueType1 *                   A,

                         const ValueType2 *                   B,

                         scalar_type<ValueType1, ValueType2> *Z);


        static void

        khatriRaoProductStridedVarBatched(

          const Layout                         layout,

          const size_type                      numMats,

          const size_type *                    stridea,

          const size_type *                    strideb,

          const size_type *                    stridec,

          const size_type *                    m,

          const size_type *                    n,

          const size_type *                    k,

          const ValueType1 *                   dA,

          const ValueType2 *                   dB,

          scalar_type<ValueType1, ValueType2> *dC,

          LinAlgOpContext<memorySpace> &       context);


        static void

        transposedKhatriRaoProduct(const Layout                         layout,

                                   const size_type                      sizeI,

                                   const size_type                      sizeJ,

                                   const size_type                      sizeK,

                                   const ValueType1 *                   A,

                                   const ValueType2 *                   B,

                                   scalar_type<ValueType1, ValueType2> *Z);


        static void

        axpby(size_type                            size,

              scalar_type<ValueType1, ValueType2>  alpha,

              const ValueType1 *                   x,

              scalar_type<ValueType1, ValueType2>  beta,

              const ValueType2 *                   y,

              scalar_type<ValueType1, ValueType2> *z);


        static void

        axpbyBlocked(const size_type                            size,

                     const size_type                            blockSize,

                     const scalar_type<ValueType1, ValueType2>  alpha1,

                     const scalar_type<ValueType1, ValueType2> *alpha,

                     const ValueType1 *                         x,

                     const scalar_type<ValueType1, ValueType2>  beta1,

                     const scalar_type<ValueType1, ValueType2> *beta,

                     const ValueType2 *                         y,

                     scalar_type<ValueType1, ValueType2> *      z);


        static void

        dotMultiVector(size_type                            vecSize,

                       size_type                            numVec,

                       const ValueType1 *                   multiVecDataX,

                       const ValueType2 *                   multiVecDataY,

                       const ScalarOp &                     opX,

                       const ScalarOp &                     opY,

                       scalar_type<ValueType1, ValueType2> *multiVecDotProduct,

                       LinAlgOpContext<memorySpace> &       context);

      };


      template <typename ValueType, dftefe::utils::MemorySpace memorySpace>

      class KernelsOneValueType

      {

      public:

        static std::vector<double>

        amaxsMultiVector(size_type        vecSize,

                         size_type        numVec,

                         const ValueType *multiVecData);


        static std::vector<double>

        nrms2MultiVector(size_type                     vecSize,

                         size_type                     numVec,

                         const ValueType *             multiVecData,

                         LinAlgOpContext<memorySpace> &context);

      };


#ifdef DFTEFE_WITH_DEVICE

      template <typename ValueType1, typename ValueType2>

      class KernelsTwoValueTypes<ValueType1,

                                 ValueType2,

                                 dftefe::utils::MemorySpace::DEVICE>

      {

      public:

        static void

        ascale(size_type                            size,

               ValueType1                           alpha,

               const ValueType2 *                   x,

               scalar_type<ValueType1, ValueType2> *z);


        /*

         * @brief Template for performing \f$ z = 1 /x$, does not check if x[i] is zero

         * @param[in] size size of the array

         * @param[in] x array

         * @param[out] z array

         */

        static void

        reciprocalX(size_type                            size,

                    const ValueType1                     alpha,

                    const ValueType2 *                   x,

                    scalar_type<ValueType1, ValueType2> *z);


        static void

        hadamardProduct(size_type                            size,

                        const ValueType1 *                   x,

                        const ValueType2 *                   y,

                        scalar_type<ValueType1, ValueType2> *z);


        static void

        hadamardProduct(size_type                            size,

                        const ValueType1 *                   x,

                        const ValueType2 *                   y,

                        const ScalarOp &                     opx,

                        const ScalarOp &                     opy,

                        scalar_type<ValueType1, ValueType2> *z);


        static void

        khatriRaoProduct(const Layout                         layout,

                         const size_type                      sizeI,

                         const size_type                      sizeJ,

                         const size_type                      sizeK,

                         const ValueType1 *                   A,

                         const ValueType2 *                   B,

                         scalar_type<ValueType1, ValueType2> *Z);


        static void

        axpby(size_type                            size,

              scalar_type<ValueType1, ValueType2>  alpha,

              const ValueType1 *                   x,

              scalar_type<ValueType1, ValueType2>  beta,

              const ValueType2 *                   y,

              scalar_type<ValueType1, ValueType2> *z);


        static void

        dotMultiVector(

          size_type                            vecSize,

          size_type                            numVec,

          const ValueType1 *                   multiVecDataX,

          const ValueType2 *                   multiVecDataY,

          const ScalarOp &                     opX,

          const ScalarOp &                     opY,

          scalar_type<ValueType1, ValueType2> *multiVecDotProduct,

          LinAlgOpContext<dftefe::utils::MemorySpace::DEVICE> &context);

      };


      template <typename ValueType>

      class KernelsOneValueType<ValueType, dftefe::utils::MemorySpace::DEVICE>

      {

      public:

        static std::vector<double>

        amaxsMultiVector(size_type        vecSize,

                         size_type        numVec,

                         const ValueType *multiVecData);


        static std::vector<double>

        nrms2MultiVector(

          size_type                                            vecSize,

          size_type                                            numVec,

          const ValueType *                                    multiVecData,

          LinAlgOpContext<dftefe::utils::MemorySpace::DEVICE> &context);

      };


#endif


    } // namespace blasLapack

  }   // namespace linearAlgebra

} // namespace dftefe


#endif // dftefeKernels_h

BlasLapackTypedef.h

LinAlgOpContext.h

MemoryManager.h

dftefe::linearAlgebra::LinAlgOpContext
Definition: LinAlgOpContext.h:38

dftefe::linearAlgebra::blasLapack::KernelsOneValueType
Definition: BlasLapackKernels.h:316

dftefe::linearAlgebra::blasLapack::KernelsOneValueType::nrms2MultiVector
static std::vector< double > nrms2MultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData, LinAlgOpContext< memorySpace > &context)
Template for computing  norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:622

dftefe::linearAlgebra::blasLapack::KernelsOneValueType::amaxsMultiVector
static std::vector< double > amaxsMultiVector(size_type vecSize, size_type numVec, const ValueType *multiVecData)
Template for computing  norms of all the numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:595

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes
namespace class for BlasLapack kernels not present in blaspp.
Definition: BlasLapackKernels.h:23

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::khatriRaoProductStridedVarBatched
static void khatriRaoProductStridedVarBatched(const Layout layout, const size_type numMats, const size_type *stridea, const size_type *strideb, const size_type *stridec, const size_type *m, const size_type *n, const size_type *k, const ValueType1 *dA, const ValueType2 *dB, scalar_type< ValueType1, ValueType2 > *dC, LinAlgOpContext< memorySpace > &context)
Template for performing khatriRao but with variable stride In column major storage format:  ,...
Definition: BlasLapackKernels.cpp:389

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::axpby
static void axpby(size_type size, scalar_type< ValueType1, ValueType2 > alpha, const ValueType1 *x, scalar_type< ValueType1, ValueType2 > beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:456

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::scaleStridedVarBatched
static void scaleStridedVarBatched(const size_type numMats, const Layout layout, const ScalarOp &scalarOpA, const ScalarOp &scalarOpB, const size_type *stridea, const size_type *strideb, const size_type *stridec, const size_type *m, const size_type *n, const size_type *k, const ValueType1 *dA, const ValueType2 *dB, scalar_type< ValueType1, ValueType2 > *dC, LinAlgOpContext< memorySpace > &context)
Template for performing hadamard product of two columns of batches of matrix A and B having num col A...
Definition: BlasLapackKernels.cpp:282

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::transposedKhatriRaoProduct
static void transposedKhatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format:  , where  is  matrix,  is ,...
Definition: BlasLapackKernels.cpp:424

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::hadamardProduct
static void hadamardProduct(size_type size, const ValueType1 *x, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing  z_i = x_i * y_i$.
Definition: BlasLapackKernels.cpp:184

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::khatriRaoProduct
static void khatriRaoProduct(const Layout layout, const size_type sizeI, const size_type sizeJ, const size_type sizeK, const ValueType1 *A, const ValueType2 *B, scalar_type< ValueType1, ValueType2 > *Z)
Template for performing In column major storage format:  , where  is  matrix,  is ,...
Definition: BlasLapackKernels.cpp:356

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::dotMultiVector
static void dotMultiVector(size_type vecSize, size_type numVec, const ValueType1 *multiVecDataX, const ValueType2 *multiVecDataY, const ScalarOp &opX, const ScalarOp &opY, scalar_type< ValueType1, ValueType2 > *multiVecDotProduct, LinAlgOpContext< memorySpace > &context)
Template for computing dot products numVec vectors in a multi Vector.
Definition: BlasLapackKernels.cpp:508

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::axpbyBlocked
static void axpbyBlocked(const size_type size, const size_type blockSize, const scalar_type< ValueType1, ValueType2 > alpha1, const scalar_type< ValueType1, ValueType2 > *alpha, const ValueType1 *x, const scalar_type< ValueType1, ValueType2 > beta1, const scalar_type< ValueType1, ValueType2 > *beta, const ValueType2 *y, scalar_type< ValueType1, ValueType2 > *z)
Template for performing .
Definition: BlasLapackKernels.cpp:477

dftefe::linearAlgebra::blasLapack::KernelsTwoValueTypes::ascale
static void ascale(size_type size, ValueType1 alpha, const ValueType2 *x, const ScalarOp &opalpha, const ScalarOp &opx, scalar_type< ValueType1, ValueType2 > *z)
Template for performing  alpha  z = \alpha x$.
Definition: BlasLapackKernels.cpp:91

dftefe::linearAlgebra::blasLapack::Layout
blas::Layout Layout
Definition: BlasLapackTypedef.h:46

dftefe::linearAlgebra::blasLapack::scalar_type
blas::scalar_type< ValueType1, ValueType2 > scalar_type
Definition: BlasLapackTypedef.h:70

dftefe::linearAlgebra::blasLapack::reciprocalX
void reciprocalX(size_type n, const ValueType1 alpha, ValueType2 const *x, scalar_type< ValueType1, ValueType2 > *y, LinAlgOpContext< memorySpace > &context)
Template for computing the multiplicative inverse of all the elements of x, does not check if any ele...
Definition: BlasLapack.t.cpp:83

dftefe::linearAlgebra::blasLapack::ScalarOp
ScalarOp
Definition: BlasLapackTypedef.h:56

dftefe::utils::MemorySpace
MemorySpace
Definition: MemorySpaceType.h:37

dftefe::utils::MemorySpace::DEVICE
@ DEVICE

dftefe
dealii includes
Definition: AtomFieldDataSpherical.cpp:31

dftefe::size_type
unsigned int size_type
Definition: TypeConfig.h:8