blob: 2f50530e79a1d0573b5bc98be3b3649c75cba6dc [file] [log] [blame]
Johannes Doerfert67ab8752021-07-25 18:26:441//===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "Mapping.h"
Johannes Doerfert93bebdc2021-11-03 15:06:5713#include "Interface.h"
Johannes Doerfert67ab8752021-07-25 18:26:4414#include "State.h"
15#include "Types.h"
16#include "Utils.h"
17
Joseph Huberb4f84432022-05-09 18:22:5918#pragma omp begin declare target device_type(nohost)
Ron Liebermanb09a5e52022-11-29 21:19:4419
Jon Chesterfield842f8752021-08-23 19:25:2320#include "llvm/Frontend/OpenMP/OMPGridValues.h"
21
Johannes Doerfert2b5a99b2022-12-19 18:54:1522using namespace ompx;
Johannes Doerfert67ab8752021-07-25 18:26:4423
Johannes Doerfert2b5a99b2022-12-19 18:54:1524namespace ompx {
Johannes Doerfert67ab8752021-07-25 18:26:4425namespace impl {
26
Joseph Huberb4f84432022-05-09 18:22:5927// Forward declarations defined to be defined for AMDGCN and NVPTX.
28const llvm::omp::GV &getGridValue();
Joseph Huberb4f84432022-05-09 18:22:5929LaneMaskTy activemask();
30LaneMaskTy lanemaskLT();
31LaneMaskTy lanemaskGT();
32uint32_t getThreadIdInWarp();
Johannes Doerfert1f3a28d2023-07-26 22:20:2033uint32_t getThreadIdInBlock(int32_t Dim);
34uint32_t getNumberOfThreadsInBlock(int32_t Dim);
35uint32_t getNumberOfThreadsInKernel();
36uint32_t getBlockIdInKernel(int32_t Dim);
37uint32_t getNumberOfBlocksInKernel(int32_t Dim);
38uint32_t getWarpIdInBlock();
Joseph Huberb4f84432022-05-09 18:22:5939uint32_t getNumberOfWarpsInBlock();
40
Johannes Doerfert67ab8752021-07-25 18:26:4441/// AMDGCN Implementation
42///
43///{
44#pragma omp begin declare variant match(device = {arch(amdgcn)})
45
Joseph Huberce0caf42022-05-10 21:33:4146const llvm::omp::GV &getGridValue() {
Jon Chesterfield72729822021-10-19 07:05:0547 return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
Jon Chesterfield842f8752021-08-23 19:25:2348}
49
Johannes Doerfert1f3a28d2023-07-26 22:20:2050uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
51 switch (Dim) {
52 case 0:
53 return __builtin_amdgcn_workgroup_size_x();
54 case 1:
55 return __builtin_amdgcn_workgroup_size_y();
56 case 2:
57 return __builtin_amdgcn_workgroup_size_z();
58 };
59 UNREACHABLE("Dim outside range!");
Johannes Doerfert93bebdc2021-11-03 15:06:5760}
61
Johannes Doerfert67ab8752021-07-25 18:26:4462LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
63
64LaneMaskTy lanemaskLT() {
65 uint32_t Lane = mapping::getThreadIdInWarp();
66 int64_t Ballot = mapping::activemask();
67 uint64_t Mask = ((uint64_t)1 << Lane) - (uint64_t)1;
68 return Mask & Ballot;
69}
70
71LaneMaskTy lanemaskGT() {
72 uint32_t Lane = mapping::getThreadIdInWarp();
73 if (Lane == (mapping::getWarpSize() - 1))
74 return 0;
75 int64_t Ballot = mapping::activemask();
76 uint64_t Mask = (~((uint64_t)0)) << (Lane + 1);
77 return Mask & Ballot;
78}
79
80uint32_t getThreadIdInWarp() {
81 return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
82}
83
Johannes Doerfert1f3a28d2023-07-26 22:20:2084uint32_t getThreadIdInBlock(int32_t Dim) {
85 switch (Dim) {
86 case 0:
87 return __builtin_amdgcn_workitem_id_x();
88 case 1:
89 return __builtin_amdgcn_workitem_id_y();
90 case 2:
91 return __builtin_amdgcn_workitem_id_z();
92 };
93 UNREACHABLE("Dim outside range!");
Ethan Stewart85c2d92b2022-11-02 16:37:4294}
Johannes Doerfert67ab8752021-07-25 18:26:4495
Johannes Doerfert1f3a28d2023-07-26 22:20:2096uint32_t getNumberOfThreadsInKernel() {
97 return __builtin_amdgcn_grid_size_x() * __builtin_amdgcn_grid_size_y() *
98 __builtin_amdgcn_grid_size_z();
99}
100
101uint32_t getBlockIdInKernel(int32_t Dim) {
102 switch (Dim) {
103 case 0:
104 return __builtin_amdgcn_workgroup_id_x();
105 case 1:
106 return __builtin_amdgcn_workgroup_id_y();
107 case 2:
108 return __builtin_amdgcn_workgroup_id_z();
109 };
110 UNREACHABLE("Dim outside range!");
111}
112
113uint32_t getNumberOfBlocksInKernel(int32_t Dim) {
114 switch (Dim) {
115 case 0:
116 return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
117 case 1:
118 return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y();
119 case 2:
120 return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();
121 };
122 UNREACHABLE("Dim outside range!");
123}
124
125uint32_t getWarpIdInBlock() {
126 return impl::getThreadIdInBlock(mapping::DIM_X) / mapping::getWarpSize();
Johannes Doerfert67ab8752021-07-25 18:26:44127}
128
Johannes Doerfert67ab8752021-07-25 18:26:44129uint32_t getNumberOfWarpsInBlock() {
Johannes Doerfert1f3a28d2023-07-26 22:20:20130 return mapping::getNumberOfThreadsInBlock() / mapping::getWarpSize();
Johannes Doerfert67ab8752021-07-25 18:26:44131}
132
133#pragma omp end declare variant
134///}
135
136/// NVPTX Implementation
137///
138///{
139#pragma omp begin declare variant match( \
Joseph Huber47800a12023-05-23 16:09:16140 device = {arch(nvptx, nvptx64)}, \
141 implementation = {extension(match_any)})
Johannes Doerfert67ab8752021-07-25 18:26:44142
Johannes Doerfert1f3a28d2023-07-26 22:20:20143uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
144 switch (Dim) {
145 case 0:
146 return __nvvm_read_ptx_sreg_ntid_x();
147 case 1:
148 return __nvvm_read_ptx_sreg_ntid_y();
149 case 2:
150 return __nvvm_read_ptx_sreg_ntid_z();
151 };
152 UNREACHABLE("Dim outside range!");
Johannes Doerfert93bebdc2021-11-03 15:06:57153}
154
Joseph Huberce0caf42022-05-10 21:33:41155const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; }
Jon Chesterfield842f8752021-08-23 19:25:23156
Johannes Doerfert67ab8752021-07-25 18:26:44157LaneMaskTy activemask() {
158 unsigned int Mask;
159 asm("activemask.b32 %0;" : "=r"(Mask));
160 return Mask;
161}
162
163LaneMaskTy lanemaskLT() {
164 __kmpc_impl_lanemask_t Res;
165 asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
166 return Res;
167}
168
169LaneMaskTy lanemaskGT() {
170 __kmpc_impl_lanemask_t Res;
171 asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
172 return Res;
173}
174
Johannes Doerfert1f3a28d2023-07-26 22:20:20175uint32_t getThreadIdInBlock(int32_t Dim) {
176 switch (Dim) {
177 case 0:
178 return __nvvm_read_ptx_sreg_tid_x();
179 case 1:
180 return __nvvm_read_ptx_sreg_tid_y();
181 case 2:
182 return __nvvm_read_ptx_sreg_tid_z();
183 };
184 UNREACHABLE("Dim outside range!");
185}
Johannes Doerfert67ab8752021-07-25 18:26:44186
Johannes Doerfert93bebdc2021-11-03 15:06:57187uint32_t getThreadIdInWarp() {
Johannes Doerfert1f3a28d2023-07-26 22:20:20188 return impl::getThreadIdInBlock(mapping::DIM_X) &
189 (mapping::getWarpSize() - 1);
Johannes Doerfert67ab8752021-07-25 18:26:44190}
191
Johannes Doerfert1f3a28d2023-07-26 22:20:20192uint32_t getBlockIdInKernel(int32_t Dim) {
193 switch (Dim) {
194 case 0:
195 return __nvvm_read_ptx_sreg_ctaid_x();
196 case 1:
197 return __nvvm_read_ptx_sreg_ctaid_y();
198 case 2:
199 return __nvvm_read_ptx_sreg_ctaid_z();
200 };
201 UNREACHABLE("Dim outside range!");
Johannes Doerfert93bebdc2021-11-03 15:06:57202}
Johannes Doerfert67ab8752021-07-25 18:26:44203
Johannes Doerfert1f3a28d2023-07-26 22:20:20204uint32_t getNumberOfBlocksInKernel(int32_t Dim) {
205 switch (Dim) {
206 case 0:
207 return __nvvm_read_ptx_sreg_nctaid_x();
208 case 1:
209 return __nvvm_read_ptx_sreg_nctaid_y();
210 case 2:
211 return __nvvm_read_ptx_sreg_nctaid_z();
212 };
213 UNREACHABLE("Dim outside range!");
214}
Johannes Doerfert67ab8752021-07-25 18:26:44215
Johannes Doerfert1f3a28d2023-07-26 22:20:20216uint32_t getNumberOfThreadsInKernel() {
217 return impl::getNumberOfThreadsInBlock(0) *
218 impl::getNumberOfBlocksInKernel(0) *
219 impl::getNumberOfThreadsInBlock(1) *
220 impl::getNumberOfBlocksInKernel(1) *
221 impl::getNumberOfThreadsInBlock(2) *
222 impl::getNumberOfBlocksInKernel(2);
223}
Johannes Doerfert67ab8752021-07-25 18:26:44224
Johannes Doerfert1f3a28d2023-07-26 22:20:20225uint32_t getWarpIdInBlock() {
226 return impl::getThreadIdInBlock(mapping::DIM_X) / mapping::getWarpSize();
Johannes Doerfert67ab8752021-07-25 18:26:44227}
228
Johannes Doerfert67ab8752021-07-25 18:26:44229uint32_t getNumberOfWarpsInBlock() {
Johannes Doerfert1f3a28d2023-07-26 22:20:20230 return (mapping::getNumberOfThreadsInBlock() + mapping::getWarpSize() - 1) /
Johannes Doerfert67ab8752021-07-25 18:26:44231 mapping::getWarpSize();
232}
233
234#pragma omp end declare variant
235///}
236
Jon Chesterfield842f8752021-08-23 19:25:23237uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
238
Johannes Doerfert67ab8752021-07-25 18:26:44239} // namespace impl
Johannes Doerfert2b5a99b2022-12-19 18:54:15240} // namespace ompx
Johannes Doerfert67ab8752021-07-25 18:26:44241
Johannes Doerfert93bebdc2021-11-03 15:06:57242/// We have to be deliberate about the distinction of `mapping::` and `impl::`
243/// below to avoid repeating assumptions or including irrelevant ones.
244///{
245
Johannes Doerfertccb5d272021-10-30 19:24:25246static bool isInLastWarp() {
Johannes Doerfert1f3a28d2023-07-26 22:20:20247 uint32_t MainTId = (mapping::getNumberOfThreadsInBlock() - 1) &
Johannes Doerfert67ab8752021-07-25 18:26:44248 ~(mapping::getWarpSize() - 1);
249 return mapping::getThreadIdInBlock() == MainTId;
250}
251
Johannes Doerfertccb5d272021-10-30 19:24:25252bool mapping::isMainThreadInGenericMode(bool IsSPMD) {
253 if (IsSPMD || icv::Level)
254 return false;
255
256 // Check if this is the last warp in the block.
257 return isInLastWarp();
258}
259
Joseph Huber85ad5662021-10-09 01:52:54260bool mapping::isMainThreadInGenericMode() {
261 return mapping::isMainThreadInGenericMode(mapping::isSPMDMode());
262}
263
Johannes Doerfertccb5d272021-10-30 19:24:25264bool mapping::isInitialThreadInLevel0(bool IsSPMD) {
265 if (IsSPMD)
266 return mapping::getThreadIdInBlock() == 0;
267 return isInLastWarp();
268}
269
Johannes Doerfert67ab8752021-07-25 18:26:44270bool mapping::isLeaderInWarp() {
271 __kmpc_impl_lanemask_t Active = mapping::activemask();
272 __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT();
273 return utils::popc(Active & LaneMaskLT) == 0;
274}
275
276LaneMaskTy mapping::activemask() { return impl::activemask(); }
277
278LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
279
280LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
281
Johannes Doerfert93bebdc2021-11-03 15:06:57282uint32_t mapping::getThreadIdInWarp() {
283 uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
Johannes Doerfert88a68de2023-07-18 23:05:08284 ASSERT(ThreadIdInWarp < impl::getWarpSize(), nullptr);
Johannes Doerfert93bebdc2021-11-03 15:06:57285 return ThreadIdInWarp;
Johannes Doerfert67ab8752021-07-25 18:26:44286}
287
Johannes Doerfert1f3a28d2023-07-26 22:20:20288uint32_t mapping::getThreadIdInBlock(int32_t Dim) {
289 uint32_t ThreadIdInBlock = impl::getThreadIdInBlock(Dim);
Johannes Doerfert93bebdc2021-11-03 15:06:57290 return ThreadIdInBlock;
291}
Johannes Doerfert67ab8752021-07-25 18:26:44292
293uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
294
Johannes Doerfert1f3a28d2023-07-26 22:20:20295uint32_t mapping::getMaxTeamThreads(bool IsSPMD) {
296 uint32_t BlockSize = mapping::getNumberOfThreadsInBlock();
297 // If we are in SPMD mode, remove one warp.
298 return BlockSize - (!IsSPMD * impl::getWarpSize());
Johannes Doerfert67ab8752021-07-25 18:26:44299}
Johannes Doerfert1f3a28d2023-07-26 22:20:20300uint32_t mapping::getMaxTeamThreads() {
301 return mapping::getMaxTeamThreads(mapping::isSPMDMode());
Johannes Doerfert57b4c522022-02-14 23:19:33302}
Johannes Doerfert67ab8752021-07-25 18:26:44303
Johannes Doerfert1f3a28d2023-07-26 22:20:20304uint32_t mapping::getNumberOfThreadsInBlock(int32_t Dim) {
305 return impl::getNumberOfThreadsInBlock(Dim);
306}
Johannes Doerfert93bebdc2021-11-03 15:06:57307
Johannes Doerfert1f3a28d2023-07-26 22:20:20308uint32_t mapping::getNumberOfThreadsInKernel() {
309 return impl::getNumberOfThreadsInKernel();
310}
311
312uint32_t mapping::getWarpIdInBlock() {
313 uint32_t WarpID = impl::getWarpIdInBlock();
Johannes Doerfert88a68de2023-07-18 23:05:08314 ASSERT(WarpID < impl::getNumberOfWarpsInBlock(), nullptr);
Johannes Doerfert93bebdc2021-11-03 15:06:57315 return WarpID;
316}
317
Johannes Doerfert1f3a28d2023-07-26 22:20:20318uint32_t mapping::getBlockIdInKernel(int32_t Dim) {
319 uint32_t BlockId = impl::getBlockIdInKernel(Dim);
320 ASSERT(BlockId < impl::getNumberOfBlocksInKernel(Dim), nullptr);
Johannes Doerfert93bebdc2021-11-03 15:06:57321 return BlockId;
322}
323
324uint32_t mapping::getNumberOfWarpsInBlock() {
325 uint32_t NumberOfWarpsInBlocks = impl::getNumberOfWarpsInBlock();
Johannes Doerfert1f3a28d2023-07-26 22:20:20326 ASSERT(impl::getWarpIdInBlock() < NumberOfWarpsInBlocks, nullptr);
Johannes Doerfert93bebdc2021-11-03 15:06:57327 return NumberOfWarpsInBlocks;
328}
329
Johannes Doerfert1f3a28d2023-07-26 22:20:20330uint32_t mapping::getNumberOfBlocksInKernel(int32_t Dim) {
331 uint32_t NumberOfBlocks = impl::getNumberOfBlocksInKernel(Dim);
332 ASSERT(impl::getBlockIdInKernel(Dim) < NumberOfBlocks, nullptr);
Johannes Doerfert93bebdc2021-11-03 15:06:57333 return NumberOfBlocks;
334}
335
Johannes Doerfert1f3a28d2023-07-26 22:20:20336uint32_t mapping::getNumberOfProcessorElements() { __builtin_trap(); }
Johannes Doerfert93bebdc2021-11-03 15:06:57337
338///}
339
Johannes Doerfert67ab8752021-07-25 18:26:44340/// Execution mode
341///
342///{
Joseph Hubere2dcc222022-03-04 17:07:57343
344// TODO: This is a workaround for initialization coming from kernels outside of
345// the TU. We will need to solve this more correctly in the future.
Joseph Huber92233152022-10-05 16:03:24346int __attribute__((weak)) SHARED(IsSPMDMode);
Johannes Doerfert67ab8752021-07-25 18:26:44347
348void mapping::init(bool IsSPMD) {
Johannes Doerfertccb5d272021-10-30 19:24:25349 if (mapping::isInitialThreadInLevel0(IsSPMD))
Johannes Doerfert67ab8752021-07-25 18:26:44350 IsSPMDMode = IsSPMD;
351}
352
353bool mapping::isSPMDMode() { return IsSPMDMode; }
354
355bool mapping::isGenericMode() { return !isSPMDMode(); }
356///}
357
Joseph Hubere95731c2021-09-21 19:32:41358extern "C" {
Joseph Huberb08369f2022-07-27 15:04:25359__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
Joseph Huber74f91742021-10-18 15:14:07360 FunctionTracingRAII();
Joseph Hubere95731c2021-09-21 19:32:41361 return mapping::getThreadIdInBlock();
362}
Joseph Huberbad44d52021-10-09 00:08:28363
Joseph Huberb08369f2022-07-27 15:04:25364__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
Joseph Huber74f91742021-10-18 15:14:07365 FunctionTracingRAII();
Johannes Doerfert1f3a28d2023-07-26 22:20:20366 return impl::getNumberOfThreadsInBlock(mapping::DIM_X);
Joseph Huberbad44d52021-10-09 00:08:28367}
Atmn Patel737c4a22021-11-09 04:16:54368
Joseph Huberb08369f2022-07-27 15:04:25369__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
Atmn Patel737c4a22021-11-09 04:16:54370 FunctionTracingRAII();
371 return impl::getWarpSize();
372}
Joseph Hubere95731c2021-09-21 19:32:41373}
Johannes Doerfert1f3a28d2023-07-26 22:20:20374
Johannes Doerfertdaef6d32023-07-27 00:25:39375#define _TGT_KERNEL_LANGUAGE(NAME, MAPPER_NAME) \
376 extern "C" int ompx_##NAME(int Dim) { return mapping::MAPPER_NAME(Dim); }
377
378_TGT_KERNEL_LANGUAGE(thread_id, getThreadIdInBlock)
379_TGT_KERNEL_LANGUAGE(thread_dim, getNumberOfThreadsInBlock)
380_TGT_KERNEL_LANGUAGE(block_id, getBlockIdInKernel)
381_TGT_KERNEL_LANGUAGE(block_dim, getNumberOfBlocksInKernel)
382
Johannes Doerfert67ab8752021-07-25 18:26:44383#pragma omp end declare target