DFT-FE 1.1.0-pre
Density Functional Theory With Finite-Elements
Loading...
Searching...
No Matches
DeviceKernelLauncherHelpers.h
Go to the documentation of this file.
1// ---------------------------------------------------------------------
2//
3// Copyright (c) 2017-2025 The Regents of the University of Michigan and DFT-FE
4// authors.
5//
6// This file is part of the DFT-FE code.
7//
8// The DFT-FE code is free software; you can use it, redistribute
9// it, and/or modify it under the terms of the GNU Lesser General
10// Public License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12// The full text of the license can be found in the file LICENSE at
13// the top level of the DFT-FE distribution.
14//
15// ---------------------------------------------------------------------
16
17/*
18 * @author Ian C. Lin., Sambit Das
19 */
20#ifdef DFTFE_WITH_DEVICE
21# ifndef dftfeDeviceKernelLauncherHelpers_h
22# define dftfeDeviceKernelLauncherHelpers_h
23
24# ifdef DFTFE_WITH_DEVICE_NVIDIA
25namespace dftfe
26{
27 namespace utils
28 {
29 static const int DEVICE_WARP_SIZE = 32;
30 static const int DEVICE_MAX_BLOCK_SIZE = 1024;
31 static const int DEVICE_BLOCK_SIZE = 256;
32
33 } // namespace utils
34} // namespace dftfe
35
36# elif DFTFE_WITH_DEVICE_AMD
37
38namespace dftfe
39{
40 namespace utils
41 {
42 static const int DEVICE_WARP_SIZE = 64;
43 static const int DEVICE_MAX_BLOCK_SIZE = 1024;
44 static const int DEVICE_BLOCK_SIZE = 512;
45
46 } // namespace utils
47} // namespace dftfe
48
49# elif DFTFE_WITH_DEVICE_INTEL
50
51namespace dftfe
52{
53 namespace utils
54 {
55 static const int DEVICE_WARP_SIZE = 32;
56 static const int DEVICE_MAX_BLOCK_SIZE = 1024;
57 static const int DEVICE_BLOCK_SIZE = 256;
58
59 } // namespace utils
60} // namespace dftfe
61
62# endif
63# ifdef DFTFE_WITH_DEVICE_LANG_CUDA
64# define DFTFE_LAUNCH_KERNEL(kernel, grid, block, stream, ...) \
65 do \
66 { \
67 kernel<<<grid, block, 0, stream>>>(__VA_ARGS__); \
68 } while (0)
69# elif defined(DFTFE_WITH_DEVICE_LANG_HIP)
70# define DFTFE_LAUNCH_KERNEL(kernel, grid, block, stream, ...) \
71 do \
72 { \
73 hipLaunchKernelGGL( \
74 HIP_KERNEL_NAME(kernel), grid, block, 0, stream, __VA_ARGS__); \
75 } while (0)
76# elif defined(DFTFE_WITH_DEVICE_LANG_SYCL)
77# define DFTFE_LAUNCH_KERNEL(kernel, grid, block, stream, ...) \
78 do \
79 { \
80 dftfe::utils::queueRegistry.find(stream)->second.parallel_for( \
81 sycl::nd_range<1>((grid) * (block), block), \
82 [=](sycl::nd_item<1> ind) { kernel(ind, __VA_ARGS__); }); \
83 } while (0)
84# else
85# error \
86 "No device backend defined (DFTFE_WITH_DEVICE_LANG_CUDA or DFTFE_WITH_DEVICE_LANG_HIP or DFTFE_WITH_DEVICE_LANG_SYCL)"
87# endif
88
89# ifdef DFTFE_WITH_DEVICE_LANG_CUDA
90# define DFTFE_LAUNCH_KERNEL_SMEM_D( \
91 kernel, grid, block, smemtype, smemcount, stream, ...) \
92 do \
93 { \
94 kernel<<<grid, block, smemcount * sizeof(smemtype), stream>>>( \
95 __VA_ARGS__); \
96 } while (0)
97# elif defined(DFTFE_WITH_DEVICE_LANG_HIP)
98# define DFTFE_LAUNCH_KERNEL_SMEM_D( \
99 kernel, grid, block, smemtype, smemcount, stream, ...) \
100 do \
101 { \
102 hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel), \
103 grid, \
104 block, \
105 smemcount * sizeof(smemtype), \
106 stream, \
107 __VA_ARGS__); \
108 } while (0)
109# elif defined(DFTFE_WITH_DEVICE_LANG_SYCL)
110# define DFTFE_LAUNCH_KERNEL_SMEM_D( \
111 kernel, grid, block, smemtype, smemcount, stream, ...) \
112 do \
113 { \
114 dftfe::utils::queueRegistry.find(stream)->second.submit( \
115 [=](sycl::handler &cgh) { \
116 sycl::local_accessor<smemtype, 1> SMem_acc(smemcount, cgh); \
117 cgh.parallel_for(sycl::nd_range<1>((grid) * (block), block), \
118 [=](sycl::nd_item<1> ind) { \
119 kernel(ind, \
120 SMem_acc.get_pointer(), \
121 __VA_ARGS__); \
122 }); \
123 }); \
124 } while (0)
125# else
126# error \
127 "No device backend defined (DFTFE_WITH_DEVICE_LANG_CUDA or DFTFE_WITH_DEVICE_LANG_HIP or DFTFE_WITH_DEVICE_LANG_SYCL)"
128# endif
129
130# ifdef DFTFE_WITH_DEVICE_LANG_CUDA
131# define DFTFE_LAUNCH_KERNEL_SMEM_S( \
132 kernel, grid, block, smemtype, smemcount, stream, ...) \
133 do \
134 { \
135 kernel<<<grid, block, 0, stream>>>(__VA_ARGS__); \
136 } while (0)
137# elif defined(DFTFE_WITH_DEVICE_LANG_HIP)
138# define DFTFE_LAUNCH_KERNEL_SMEM_S( \
139 kernel, grid, block, smemtype, smemcount, stream, ...) \
140 do \
141 { \
142 hipLaunchKernelGGL( \
143 HIP_KERNEL_NAME(kernel), grid, block, 0, stream, __VA_ARGS__); \
144 } while (0)
145# elif defined(DFTFE_WITH_DEVICE_LANG_SYCL)
146# define DFTFE_LAUNCH_KERNEL_SMEM_S( \
147 kernel, grid, block, smemtype, smemcount, stream, ...) \
148 do \
149 { \
150 dftfe::utils::queueRegistry.find(stream)->second.submit( \
151 [=](sycl::handler &cgh) { \
152 sycl::local_accessor<smemtype, 1> SMem_acc(smemcount, cgh); \
153 cgh.parallel_for(sycl::nd_range<1>((grid) * (block), block), \
154 [=](sycl::nd_item<1> ind) { \
155 kernel(ind, \
156 SMem_acc.get_pointer(), \
157 __VA_ARGS__); \
158 }); \
159 }); \
160 } while (0)
161# else
162# error \
163 "No device backend defined (DFTFE_WITH_DEVICE_LANG_CUDA or DFTFE_WITH_DEVICE_LANG_HIP or DFTFE_WITH_DEVICE_LANG_SYCL)"
164# endif
165
166
167# define DFTFE_KERNEL_NAME(...) __VA_ARGS__
168
169
170# if defined(DFTFE_WITH_DEVICE_LANG_CUDA) || \
171 defined(DFTFE_WITH_DEVICE_LANG_HIP)
172# define DFTFE_CREATE_KERNEL(RET, NAME, BODY, ...) \
173 __global__ RET NAME(__VA_ARGS__) \
174 { \
175 const dftfe::uInt globalThreadId = \
176 blockIdx.x * blockDim.x + threadIdx.x; \
177 const dftfe::uInt nThreadsPerBlock = blockDim.x; \
178 const dftfe::uInt nThreadBlock = gridDim.x; \
179 BODY \
180 }
181# elif defined(DFTFE_WITH_DEVICE_LANG_SYCL)
182# define DFTFE_CREATE_KERNEL(RET, NAME, BODY, ...) \
183 RET NAME(sycl::nd_item<1> ind, __VA_ARGS__) \
184 { \
185 const dftfe::uInt globalThreadId = ind.get_global_id(0); \
186 const dftfe::uInt nThreadsPerBlock = ind.get_local_range(0); \
187 const dftfe::uInt nThreadBlock = ind.get_group_range(0); \
188 BODY \
189 }
190# else
191# error \
192 "No device backend defined (DFTFE_WITH_DEVICE_LANG_CUDA or DFTFE_WITH_DEVICE_LANG_HIP or DFTFE_WITH_DEVICE_LANG_SYCL)"
193# endif
194
195# if defined(DFTFE_WITH_DEVICE_LANG_CUDA) || \
196 defined(DFTFE_WITH_DEVICE_LANG_HIP)
197# define DFTFE_CREATE_KERNEL_SMEM_D(SMEMTYPE, RET, NAME, BODY, ...) \
198 __global__ RET NAME(__VA_ARGS__) \
199 { \
200 extern __shared__ SMEMTYPE smem[]; \
201 const dftfe::uInt globalThreadId = \
202 blockIdx.x * blockDim.x + threadIdx.x; \
203 const dftfe::uInt threadId = threadIdx.x; \
204 const dftfe::uInt blockId = blockIdx.x; \
205 const dftfe::uInt nThreadsPerBlock = blockDim.x; \
206 const dftfe::uInt nThreadBlock = gridDim.x; \
207 BODY \
208 }
209# elif defined(DFTFE_WITH_DEVICE_LANG_SYCL)
210# define DFTFE_CREATE_KERNEL_SMEM_D(SMEMTYPE, RET, NAME, BODY, ...) \
211 RET NAME(sycl::nd_item<1> ind, SMEMTYPE *smem, __VA_ARGS__) \
212 { \
213 const dftfe::uInt globalThreadId = ind.get_global_id(0); \
214 const dftfe::uInt threadId = ind.get_local_id(0); \
215 const dftfe::uInt blockId = ind.get_group(0); \
216 const dftfe::uInt nThreadsPerBlock = ind.get_local_range(0); \
217 const dftfe::uInt nThreadBlock = ind.get_group_range(0); \
218 BODY \
219 }
220# else
221# error \
222 "No device backend defined (DFTFE_WITH_DEVICE_LANG_CUDA or DFTFE_WITH_DEVICE_LANG_HIP or DFTFE_WITH_DEVICE_LANG_SYCL)"
223# endif
224
225# if defined(DFTFE_WITH_DEVICE_LANG_CUDA) || \
226 defined(DFTFE_WITH_DEVICE_LANG_HIP)
227# define DFTFE_CREATE_KERNEL_SMEM_S( \
228 SMEMTYPE, SMEMCOUNT, RET, NAME, BODY, ...) \
229 __global__ RET NAME(__VA_ARGS__) \
230 { \
231 __shared__ SMEMTYPE smem[SMEMCOUNT]; \
232 const dftfe::uInt globalThreadId = \
233 blockIdx.x * blockDim.x + threadIdx.x; \
234 const dftfe::uInt threadId = threadIdx.x; \
235 const dftfe::uInt blockId = blockIdx.x; \
236 const dftfe::uInt nThreadsPerBlock = blockDim.x; \
237 const dftfe::uInt nThreadBlock = gridDim.x; \
238 BODY \
239 }
240# elif defined(DFTFE_WITH_DEVICE_LANG_SYCL)
241# define DFTFE_CREATE_KERNEL_SMEM_S( \
242 SMEMTYPE, SMEMCOUNT, RET, NAME, BODY, ...) \
243 RET NAME(sycl::nd_item<1> ind, SMEMTYPE *smem, __VA_ARGS__) \
244 { \
245 const dftfe::uInt globalThreadId = ind.get_global_id(0); \
246 const dftfe::uInt threadId = ind.get_local_id(0); \
247 const dftfe::uInt blockId = ind.get_group(0); \
248 const dftfe::uInt nThreadsPerBlock = ind.get_local_range(0); \
249 const dftfe::uInt nThreadBlock = ind.get_group_range(0); \
250 BODY \
251 }
252# else
253# error \
254 "No device backend defined (DFTFE_WITH_DEVICE_LANG_CUDA or DFTFE_WITH_DEVICE_LANG_HIP or DFTFE_WITH_DEVICE_LANG_SYCL)"
255# endif
256
257
258# if defined(DFTFE_WITH_DEVICE_LANG_CUDA) || \
259 defined(DFTFE_WITH_DEVICE_LANG_HIP)
260# define SYNCTHREADS __syncthreads()
261# elif defined(DFTFE_WITH_DEVICE_LANG_SYCL)
262# define SYNCTHREADS sycl::group_barrier(ind.get_group());
263# else
264# error \
265 "No device backend defined (DFTFE_WITH_DEVICE_LANG_CUDA or DFTFE_WITH_DEVICE_LANG_HIP or DFTFE_WITH_DEVICE_LANG_SYCL)"
266# endif
267
268# endif // dftfeDeviceKernelLauncherHelpers_h
269#endif // DFTFE_WITH_DEVICE
Definition Cell.h:36
Definition pseudoPotentialToDftfeConverter.cc:34