blob: 48ca13a5c31d4ebe02d08543bf703aebeec1d20c [file] [log] [blame]
Johannes Doerfert67ab8752021-07-25 18:26:441//===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "Mapping.h"
Johannes Doerfert93bebdc2021-11-03 15:06:5713#include "Interface.h"
Johannes Doerfert67ab8752021-07-25 18:26:4414#include "State.h"
15#include "Types.h"
16#include "Utils.h"
17
Joseph Huberb4f84432022-05-09 18:22:5918#pragma omp begin declare target device_type(nohost)
Johannes Doerfert67ab8752021-07-25 18:26:4419
Jon Chesterfield842f8752021-08-23 19:25:2320#include "llvm/Frontend/OpenMP/OMPGridValues.h"
21
Johannes Doerfert67ab8752021-07-25 18:26:4422using namespace _OMP;
23
24namespace _OMP {
25namespace impl {
26
Joseph Huberb4f84432022-05-09 18:22:5927// Forward declarations defined to be defined for AMDGCN and NVPTX.
28const llvm::omp::GV &getGridValue();
29uint32_t getGridDim(uint32_t n, uint16_t d);
30uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
31 uint16_t group_size);
32uint32_t getNumHardwareThreadsInBlock();
33LaneMaskTy activemask();
34LaneMaskTy lanemaskLT();
35LaneMaskTy lanemaskGT();
36uint32_t getThreadIdInWarp();
37uint32_t getThreadIdInBlock();
38uint32_t getKernelSize();
39uint32_t getBlockId();
40uint32_t getNumberOfBlocks();
41uint32_t getWarpId();
42uint32_t getNumberOfWarpsInBlock();
43
Johannes Doerfert67ab8752021-07-25 18:26:4444/// AMDGCN Implementation
45///
46///{
47#pragma omp begin declare variant match(device = {arch(amdgcn)})
48
Joseph Huber48e3dce2022-02-14 19:01:0449static const llvm::omp::GV &getGridValue() {
Jon Chesterfield72729822021-10-19 07:05:0550 return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
Jon Chesterfield842f8752021-08-23 19:25:2351}
52
Johannes Doerfert67ab8752021-07-25 18:26:4453uint32_t getGridDim(uint32_t n, uint16_t d) {
54 uint32_t q = n / d;
55 return q + (n > q * d);
56}
57
58uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
59 uint16_t group_size) {
60 uint32_t r = grid_size - group_id * group_size;
61 return (r < group_size) ? r : group_size;
62}
63
Johannes Doerfert93bebdc2021-11-03 15:06:5764uint32_t getNumHardwareThreadsInBlock() {
65 return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
66 __builtin_amdgcn_grid_size_x(),
67 __builtin_amdgcn_workgroup_size_x());
68}
69
Johannes Doerfert67ab8752021-07-25 18:26:4470LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
71
72LaneMaskTy lanemaskLT() {
73 uint32_t Lane = mapping::getThreadIdInWarp();
74 int64_t Ballot = mapping::activemask();
75 uint64_t Mask = ((uint64_t)1 << Lane) - (uint64_t)1;
76 return Mask & Ballot;
77}
78
79LaneMaskTy lanemaskGT() {
80 uint32_t Lane = mapping::getThreadIdInWarp();
81 if (Lane == (mapping::getWarpSize() - 1))
82 return 0;
83 int64_t Ballot = mapping::activemask();
84 uint64_t Mask = (~((uint64_t)0)) << (Lane + 1);
85 return Mask & Ballot;
86}
87
88uint32_t getThreadIdInWarp() {
89 return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
90}
91
92uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
93
Johannes Doerfert67ab8752021-07-25 18:26:4494uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
95
96uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
97
98uint32_t getNumberOfBlocks() {
99 return getGridDim(__builtin_amdgcn_grid_size_x(),
100 __builtin_amdgcn_workgroup_size_x());
101}
102
Johannes Doerfert67ab8752021-07-25 18:26:44103uint32_t getWarpId() {
Johannes Doerfert93bebdc2021-11-03 15:06:57104 return impl::getThreadIdInBlock() / mapping::getWarpSize();
Johannes Doerfert67ab8752021-07-25 18:26:44105}
106
Johannes Doerfert67ab8752021-07-25 18:26:44107uint32_t getNumberOfWarpsInBlock() {
108 return mapping::getBlockSize() / mapping::getWarpSize();
109}
110
111#pragma omp end declare variant
112///}
113
114/// NVPTX Implementation
115///
116///{
117#pragma omp begin declare variant match( \
118 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
119
Johannes Doerfert93bebdc2021-11-03 15:06:57120uint32_t getNumHardwareThreadsInBlock() {
121 return __nvvm_read_ptx_sreg_ntid_x();
122}
123
Joseph Huber48e3dce2022-02-14 19:01:04124static const llvm::omp::GV &getGridValue() {
Jon Chesterfield842f8752021-08-23 19:25:23125 return llvm::omp::NVPTXGridValues;
126}
127
Johannes Doerfert67ab8752021-07-25 18:26:44128LaneMaskTy activemask() {
129 unsigned int Mask;
130 asm("activemask.b32 %0;" : "=r"(Mask));
131 return Mask;
132}
133
134LaneMaskTy lanemaskLT() {
135 __kmpc_impl_lanemask_t Res;
136 asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
137 return Res;
138}
139
140LaneMaskTy lanemaskGT() {
141 __kmpc_impl_lanemask_t Res;
142 asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
143 return Res;
144}
145
Johannes Doerfert67ab8752021-07-25 18:26:44146uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
147
Johannes Doerfert93bebdc2021-11-03 15:06:57148uint32_t getThreadIdInWarp() {
149 return impl::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
Johannes Doerfert67ab8752021-07-25 18:26:44150}
151
Johannes Doerfert93bebdc2021-11-03 15:06:57152uint32_t getKernelSize() {
153 return __nvvm_read_ptx_sreg_nctaid_x() *
154 mapping::getNumberOfProcessorElements();
155}
Johannes Doerfert67ab8752021-07-25 18:26:44156
157uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
158
159uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
160
Johannes Doerfert67ab8752021-07-25 18:26:44161uint32_t getWarpId() {
Johannes Doerfert93bebdc2021-11-03 15:06:57162 return impl::getThreadIdInBlock() / mapping::getWarpSize();
Johannes Doerfert67ab8752021-07-25 18:26:44163}
164
Johannes Doerfert67ab8752021-07-25 18:26:44165uint32_t getNumberOfWarpsInBlock() {
166 return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
167 mapping::getWarpSize();
168}
169
170#pragma omp end declare variant
171///}
172
Jon Chesterfield842f8752021-08-23 19:25:23173uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
174
Johannes Doerfert67ab8752021-07-25 18:26:44175} // namespace impl
176} // namespace _OMP
177
Johannes Doerfert93bebdc2021-11-03 15:06:57178/// We have to be deliberate about the distinction of `mapping::` and `impl::`
179/// below to avoid repeating assumptions or including irrelevant ones.
180///{
181
Johannes Doerfertccb5d272021-10-30 19:24:25182static bool isInLastWarp() {
Johannes Doerfert67ab8752021-07-25 18:26:44183 uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
184 ~(mapping::getWarpSize() - 1);
185 return mapping::getThreadIdInBlock() == MainTId;
186}
187
Johannes Doerfertccb5d272021-10-30 19:24:25188bool mapping::isMainThreadInGenericMode(bool IsSPMD) {
189 if (IsSPMD || icv::Level)
190 return false;
191
192 // Check if this is the last warp in the block.
193 return isInLastWarp();
194}
195
Joseph Huber85ad5662021-10-09 01:52:54196bool mapping::isMainThreadInGenericMode() {
197 return mapping::isMainThreadInGenericMode(mapping::isSPMDMode());
198}
199
Johannes Doerfertccb5d272021-10-30 19:24:25200bool mapping::isInitialThreadInLevel0(bool IsSPMD) {
201 if (IsSPMD)
202 return mapping::getThreadIdInBlock() == 0;
203 return isInLastWarp();
204}
205
Johannes Doerfert67ab8752021-07-25 18:26:44206bool mapping::isLeaderInWarp() {
207 __kmpc_impl_lanemask_t Active = mapping::activemask();
208 __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT();
209 return utils::popc(Active & LaneMaskLT) == 0;
210}
211
212LaneMaskTy mapping::activemask() { return impl::activemask(); }
213
214LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
215
216LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
217
Johannes Doerfert93bebdc2021-11-03 15:06:57218uint32_t mapping::getThreadIdInWarp() {
219 uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
220 ASSERT(ThreadIdInWarp < impl::getWarpSize());
221 return ThreadIdInWarp;
Johannes Doerfert67ab8752021-07-25 18:26:44222}
223
Johannes Doerfert93bebdc2021-11-03 15:06:57224uint32_t mapping::getThreadIdInBlock() {
225 uint32_t ThreadIdInBlock = impl::getThreadIdInBlock();
226 ASSERT(ThreadIdInBlock < impl::getNumHardwareThreadsInBlock());
227 return ThreadIdInBlock;
228}
Johannes Doerfert67ab8752021-07-25 18:26:44229
230uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
231
Johannes Doerfert57b4c522022-02-14 23:19:33232uint32_t mapping::getBlockSize(bool IsSPMD) {
Johannes Doerfert93bebdc2021-11-03 15:06:57233 uint32_t BlockSize = mapping::getNumberOfProcessorElements() -
Johannes Doerfert57b4c522022-02-14 23:19:33234 (!IsSPMD * impl::getWarpSize());
Johannes Doerfert93bebdc2021-11-03 15:06:57235 return BlockSize;
Johannes Doerfert67ab8752021-07-25 18:26:44236}
Johannes Doerfert57b4c522022-02-14 23:19:33237uint32_t mapping::getBlockSize() {
238 return mapping::getBlockSize(mapping::isSPMDMode());
239}
Johannes Doerfert67ab8752021-07-25 18:26:44240
Johannes Doerfert93bebdc2021-11-03 15:06:57241uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
242
243uint32_t mapping::getWarpId() {
244 uint32_t WarpID = impl::getWarpId();
245 ASSERT(WarpID < impl::getNumberOfWarpsInBlock());
246 return WarpID;
247}
248
249uint32_t mapping::getBlockId() {
250 uint32_t BlockId = impl::getBlockId();
251 ASSERT(BlockId < impl::getNumberOfBlocks());
252 return BlockId;
253}
254
255uint32_t mapping::getNumberOfWarpsInBlock() {
256 uint32_t NumberOfWarpsInBlocks = impl::getNumberOfWarpsInBlock();
257 ASSERT(impl::getWarpId() < NumberOfWarpsInBlocks);
258 return NumberOfWarpsInBlocks;
259}
260
261uint32_t mapping::getNumberOfBlocks() {
262 uint32_t NumberOfBlocks = impl::getNumberOfBlocks();
263 ASSERT(impl::getBlockId() < NumberOfBlocks);
264 return NumberOfBlocks;
265}
266
267uint32_t mapping::getNumberOfProcessorElements() {
268 uint32_t NumberOfProcessorElements = impl::getNumHardwareThreadsInBlock();
269 ASSERT(impl::getThreadIdInBlock() < NumberOfProcessorElements);
270 return NumberOfProcessorElements;
271}
272
273///}
274
Johannes Doerfert67ab8752021-07-25 18:26:44275/// Execution mode
276///
277///{
Joseph Hubere2dcc222022-03-04 17:07:57278
279// TODO: This is a workaround for initialization coming from kernels outside of
280// the TU. We will need to solve this more correctly in the future.
281int __attribute__((used, retain, weak)) SHARED(IsSPMDMode);
Johannes Doerfert67ab8752021-07-25 18:26:44282
283void mapping::init(bool IsSPMD) {
Johannes Doerfertccb5d272021-10-30 19:24:25284 if (mapping::isInitialThreadInLevel0(IsSPMD))
Johannes Doerfert67ab8752021-07-25 18:26:44285 IsSPMDMode = IsSPMD;
286}
287
288bool mapping::isSPMDMode() { return IsSPMDMode; }
289
290bool mapping::isGenericMode() { return !isSPMDMode(); }
291///}
292
Joseph Hubere95731c2021-09-21 19:32:41293extern "C" {
Joseph Huber1cf86df2021-09-22 13:20:49294__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
Joseph Huber74f91742021-10-18 15:14:07295 FunctionTracingRAII();
Joseph Hubere95731c2021-09-21 19:32:41296 return mapping::getThreadIdInBlock();
297}
Joseph Huberbad44d52021-10-09 00:08:28298
299__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
Joseph Huber74f91742021-10-18 15:14:07300 FunctionTracingRAII();
Johannes Doerfert93bebdc2021-11-03 15:06:57301 return impl::getNumHardwareThreadsInBlock();
Joseph Huberbad44d52021-10-09 00:08:28302}
Atmn Patel737c4a22021-11-09 04:16:54303
304__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
305 FunctionTracingRAII();
306 return impl::getWarpSize();
307}
Joseph Hubere95731c2021-09-21 19:32:41308}
Johannes Doerfert67ab8752021-07-25 18:26:44309#pragma omp end declare target