blob: 23615cb2f4ed1320eb3eb634f94c182c0c9f42a7 [file] [log] [blame]
Johannes Doerfert67ab8752021-07-25 18:26:441//===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "Mapping.h"
Johannes Doerfert93bebdc2021-11-03 15:06:5713#include "Interface.h"
Johannes Doerfert67ab8752021-07-25 18:26:4414#include "State.h"
15#include "Types.h"
16#include "Utils.h"
17
18#pragma omp declare target
19
Jon Chesterfield842f8752021-08-23 19:25:2320#include "llvm/Frontend/OpenMP/OMPGridValues.h"
21
Johannes Doerfert67ab8752021-07-25 18:26:4422using namespace _OMP;
23
24namespace _OMP {
25namespace impl {
26
27/// AMDGCN Implementation
28///
29///{
30#pragma omp begin declare variant match(device = {arch(amdgcn)})
31
Joseph Huber48e3dce2022-02-14 19:01:0432static const llvm::omp::GV &getGridValue() {
Jon Chesterfield72729822021-10-19 07:05:0533 return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
Jon Chesterfield842f8752021-08-23 19:25:2334}
35
Johannes Doerfert67ab8752021-07-25 18:26:4436uint32_t getGridDim(uint32_t n, uint16_t d) {
37 uint32_t q = n / d;
38 return q + (n > q * d);
39}
40
41uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
42 uint16_t group_size) {
43 uint32_t r = grid_size - group_id * group_size;
44 return (r < group_size) ? r : group_size;
45}
46
Johannes Doerfert93bebdc2021-11-03 15:06:5747uint32_t getNumHardwareThreadsInBlock() {
48 return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
49 __builtin_amdgcn_grid_size_x(),
50 __builtin_amdgcn_workgroup_size_x());
51}
52
Johannes Doerfert67ab8752021-07-25 18:26:4453LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
54
55LaneMaskTy lanemaskLT() {
56 uint32_t Lane = mapping::getThreadIdInWarp();
57 int64_t Ballot = mapping::activemask();
58 uint64_t Mask = ((uint64_t)1 << Lane) - (uint64_t)1;
59 return Mask & Ballot;
60}
61
62LaneMaskTy lanemaskGT() {
63 uint32_t Lane = mapping::getThreadIdInWarp();
64 if (Lane == (mapping::getWarpSize() - 1))
65 return 0;
66 int64_t Ballot = mapping::activemask();
67 uint64_t Mask = (~((uint64_t)0)) << (Lane + 1);
68 return Mask & Ballot;
69}
70
71uint32_t getThreadIdInWarp() {
72 return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
73}
74
75uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
76
Johannes Doerfert67ab8752021-07-25 18:26:4477uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
78
79uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
80
81uint32_t getNumberOfBlocks() {
82 return getGridDim(__builtin_amdgcn_grid_size_x(),
83 __builtin_amdgcn_workgroup_size_x());
84}
85
Johannes Doerfert67ab8752021-07-25 18:26:4486uint32_t getWarpId() {
Johannes Doerfert93bebdc2021-11-03 15:06:5787 return impl::getThreadIdInBlock() / mapping::getWarpSize();
Johannes Doerfert67ab8752021-07-25 18:26:4488}
89
Johannes Doerfert67ab8752021-07-25 18:26:4490uint32_t getNumberOfWarpsInBlock() {
91 return mapping::getBlockSize() / mapping::getWarpSize();
92}
93
94#pragma omp end declare variant
95///}
96
97/// NVPTX Implementation
98///
99///{
100#pragma omp begin declare variant match( \
101 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
102
Johannes Doerfert93bebdc2021-11-03 15:06:57103uint32_t getNumHardwareThreadsInBlock() {
104 return __nvvm_read_ptx_sreg_ntid_x();
105}
106
Joseph Huber48e3dce2022-02-14 19:01:04107static const llvm::omp::GV &getGridValue() {
Jon Chesterfield842f8752021-08-23 19:25:23108 return llvm::omp::NVPTXGridValues;
109}
110
Johannes Doerfert67ab8752021-07-25 18:26:44111LaneMaskTy activemask() {
112 unsigned int Mask;
113 asm("activemask.b32 %0;" : "=r"(Mask));
114 return Mask;
115}
116
117LaneMaskTy lanemaskLT() {
118 __kmpc_impl_lanemask_t Res;
119 asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
120 return Res;
121}
122
123LaneMaskTy lanemaskGT() {
124 __kmpc_impl_lanemask_t Res;
125 asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
126 return Res;
127}
128
Johannes Doerfert67ab8752021-07-25 18:26:44129uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
130
Johannes Doerfert93bebdc2021-11-03 15:06:57131uint32_t getThreadIdInWarp() {
132 return impl::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
Johannes Doerfert67ab8752021-07-25 18:26:44133}
134
Johannes Doerfert93bebdc2021-11-03 15:06:57135uint32_t getKernelSize() {
136 return __nvvm_read_ptx_sreg_nctaid_x() *
137 mapping::getNumberOfProcessorElements();
138}
Johannes Doerfert67ab8752021-07-25 18:26:44139
140uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
141
142uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
143
Johannes Doerfert67ab8752021-07-25 18:26:44144uint32_t getWarpId() {
Johannes Doerfert93bebdc2021-11-03 15:06:57145 return impl::getThreadIdInBlock() / mapping::getWarpSize();
Johannes Doerfert67ab8752021-07-25 18:26:44146}
147
Johannes Doerfert67ab8752021-07-25 18:26:44148uint32_t getNumberOfWarpsInBlock() {
149 return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
150 mapping::getWarpSize();
151}
152
153#pragma omp end declare variant
154///}
155
Jon Chesterfield842f8752021-08-23 19:25:23156uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
157
Johannes Doerfert67ab8752021-07-25 18:26:44158} // namespace impl
159} // namespace _OMP
160
Johannes Doerfert93bebdc2021-11-03 15:06:57161/// We have to be deliberate about the distinction of `mapping::` and `impl::`
162/// below to avoid repeating assumptions or including irrelevant ones.
163///{
164
Johannes Doerfertccb5d272021-10-30 19:24:25165static bool isInLastWarp() {
Johannes Doerfert67ab8752021-07-25 18:26:44166 uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
167 ~(mapping::getWarpSize() - 1);
168 return mapping::getThreadIdInBlock() == MainTId;
169}
170
Johannes Doerfertccb5d272021-10-30 19:24:25171bool mapping::isMainThreadInGenericMode(bool IsSPMD) {
172 if (IsSPMD || icv::Level)
173 return false;
174
175 // Check if this is the last warp in the block.
176 return isInLastWarp();
177}
178
Joseph Huber85ad5662021-10-09 01:52:54179bool mapping::isMainThreadInGenericMode() {
180 return mapping::isMainThreadInGenericMode(mapping::isSPMDMode());
181}
182
Johannes Doerfertccb5d272021-10-30 19:24:25183bool mapping::isInitialThreadInLevel0(bool IsSPMD) {
184 if (IsSPMD)
185 return mapping::getThreadIdInBlock() == 0;
186 return isInLastWarp();
187}
188
Johannes Doerfert67ab8752021-07-25 18:26:44189bool mapping::isLeaderInWarp() {
190 __kmpc_impl_lanemask_t Active = mapping::activemask();
191 __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT();
192 return utils::popc(Active & LaneMaskLT) == 0;
193}
194
195LaneMaskTy mapping::activemask() { return impl::activemask(); }
196
197LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
198
199LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
200
Johannes Doerfert93bebdc2021-11-03 15:06:57201uint32_t mapping::getThreadIdInWarp() {
202 uint32_t ThreadIdInWarp = impl::getThreadIdInWarp();
203 ASSERT(ThreadIdInWarp < impl::getWarpSize());
204 return ThreadIdInWarp;
Johannes Doerfert67ab8752021-07-25 18:26:44205}
206
Johannes Doerfert93bebdc2021-11-03 15:06:57207uint32_t mapping::getThreadIdInBlock() {
208 uint32_t ThreadIdInBlock = impl::getThreadIdInBlock();
209 ASSERT(ThreadIdInBlock < impl::getNumHardwareThreadsInBlock());
210 return ThreadIdInBlock;
211}
Johannes Doerfert67ab8752021-07-25 18:26:44212
213uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
214
Johannes Doerfert57b4c522022-02-14 23:19:33215uint32_t mapping::getBlockSize(bool IsSPMD) {
Johannes Doerfert93bebdc2021-11-03 15:06:57216 uint32_t BlockSize = mapping::getNumberOfProcessorElements() -
Johannes Doerfert57b4c522022-02-14 23:19:33217 (!IsSPMD * impl::getWarpSize());
Johannes Doerfert93bebdc2021-11-03 15:06:57218 return BlockSize;
Johannes Doerfert67ab8752021-07-25 18:26:44219}
Johannes Doerfert57b4c522022-02-14 23:19:33220uint32_t mapping::getBlockSize() {
221 return mapping::getBlockSize(mapping::isSPMDMode());
222}
Johannes Doerfert67ab8752021-07-25 18:26:44223
Johannes Doerfert93bebdc2021-11-03 15:06:57224uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
225
226uint32_t mapping::getWarpId() {
227 uint32_t WarpID = impl::getWarpId();
228 ASSERT(WarpID < impl::getNumberOfWarpsInBlock());
229 return WarpID;
230}
231
232uint32_t mapping::getBlockId() {
233 uint32_t BlockId = impl::getBlockId();
234 ASSERT(BlockId < impl::getNumberOfBlocks());
235 return BlockId;
236}
237
238uint32_t mapping::getNumberOfWarpsInBlock() {
239 uint32_t NumberOfWarpsInBlocks = impl::getNumberOfWarpsInBlock();
240 ASSERT(impl::getWarpId() < NumberOfWarpsInBlocks);
241 return NumberOfWarpsInBlocks;
242}
243
244uint32_t mapping::getNumberOfBlocks() {
245 uint32_t NumberOfBlocks = impl::getNumberOfBlocks();
246 ASSERT(impl::getBlockId() < NumberOfBlocks);
247 return NumberOfBlocks;
248}
249
250uint32_t mapping::getNumberOfProcessorElements() {
251 uint32_t NumberOfProcessorElements = impl::getNumHardwareThreadsInBlock();
252 ASSERT(impl::getThreadIdInBlock() < NumberOfProcessorElements);
253 return NumberOfProcessorElements;
254}
255
256///}
257
Johannes Doerfert67ab8752021-07-25 18:26:44258/// Execution mode
259///
260///{
261static int SHARED(IsSPMDMode);
262
263void mapping::init(bool IsSPMD) {
Johannes Doerfertccb5d272021-10-30 19:24:25264 if (mapping::isInitialThreadInLevel0(IsSPMD))
Johannes Doerfert67ab8752021-07-25 18:26:44265 IsSPMDMode = IsSPMD;
266}
267
268bool mapping::isSPMDMode() { return IsSPMDMode; }
269
270bool mapping::isGenericMode() { return !isSPMDMode(); }
271///}
272
Joseph Hubere95731c2021-09-21 19:32:41273extern "C" {
Joseph Huber1cf86df2021-09-22 13:20:49274__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
Joseph Huber74f91742021-10-18 15:14:07275 FunctionTracingRAII();
Joseph Hubere95731c2021-09-21 19:32:41276 return mapping::getThreadIdInBlock();
277}
Joseph Huberbad44d52021-10-09 00:08:28278
279__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
Joseph Huber74f91742021-10-18 15:14:07280 FunctionTracingRAII();
Johannes Doerfert93bebdc2021-11-03 15:06:57281 return impl::getNumHardwareThreadsInBlock();
Joseph Huberbad44d52021-10-09 00:08:28282}
Atmn Patel737c4a22021-11-09 04:16:54283
284__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
285 FunctionTracingRAII();
286 return impl::getWarpSize();
287}
Joseph Hubere95731c2021-09-21 19:32:41288}
Johannes Doerfert67ab8752021-07-25 18:26:44289#pragma omp end declare target