blob: be937d1ca69e549dfbd5ab415ee1b9695af7b8a5 [file] [log] [blame]
Johannes Doerfert67ab8752021-07-25 18:26:441//===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "Mapping.h"
13#include "State.h"
14#include "Types.h"
15#include "Utils.h"
16
17#pragma omp declare target
18
Jon Chesterfield842f8752021-08-23 19:25:2319#include "llvm/Frontend/OpenMP/OMPGridValues.h"
20
Johannes Doerfert67ab8752021-07-25 18:26:4421using namespace _OMP;
22
23namespace _OMP {
24namespace impl {
25
26/// AMDGCN Implementation
27///
28///{
29#pragma omp begin declare variant match(device = {arch(amdgcn)})
30
Jon Chesterfield842f8752021-08-23 19:25:2331constexpr const llvm::omp::GV &getGridValue() {
Jon Chesterfield72729822021-10-19 07:05:0532 return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
Jon Chesterfield842f8752021-08-23 19:25:2333}
34
Johannes Doerfert67ab8752021-07-25 18:26:4435uint32_t getGridDim(uint32_t n, uint16_t d) {
36 uint32_t q = n / d;
37 return q + (n > q * d);
38}
39
40uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
41 uint16_t group_size) {
42 uint32_t r = grid_size - group_id * group_size;
43 return (r < group_size) ? r : group_size;
44}
45
46LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
47
48LaneMaskTy lanemaskLT() {
49 uint32_t Lane = mapping::getThreadIdInWarp();
50 int64_t Ballot = mapping::activemask();
51 uint64_t Mask = ((uint64_t)1 << Lane) - (uint64_t)1;
52 return Mask & Ballot;
53}
54
55LaneMaskTy lanemaskGT() {
56 uint32_t Lane = mapping::getThreadIdInWarp();
57 if (Lane == (mapping::getWarpSize() - 1))
58 return 0;
59 int64_t Ballot = mapping::activemask();
60 uint64_t Mask = (~((uint64_t)0)) << (Lane + 1);
61 return Mask & Ballot;
62}
63
64uint32_t getThreadIdInWarp() {
65 return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
66}
67
68uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
69
70uint32_t getBlockSize() {
71 // TODO: verify this logic for generic mode.
72 return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
73 __builtin_amdgcn_grid_size_x(),
74 __builtin_amdgcn_workgroup_size_x());
75}
76
77uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
78
79uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
80
81uint32_t getNumberOfBlocks() {
82 return getGridDim(__builtin_amdgcn_grid_size_x(),
83 __builtin_amdgcn_workgroup_size_x());
84}
85
86uint32_t getNumberOfProcessorElements() {
Joseph Huberbad44d52021-10-09 00:08:2887 return getBlockSize();
Johannes Doerfert67ab8752021-07-25 18:26:4488}
89
90uint32_t getWarpId() {
91 return mapping::getThreadIdInBlock() / mapping::getWarpSize();
92}
93
Johannes Doerfert67ab8752021-07-25 18:26:4494uint32_t getNumberOfWarpsInBlock() {
95 return mapping::getBlockSize() / mapping::getWarpSize();
96}
97
98#pragma omp end declare variant
99///}
100
101/// NVPTX Implementation
102///
103///{
104#pragma omp begin declare variant match( \
105 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
106
Jon Chesterfield842f8752021-08-23 19:25:23107constexpr const llvm::omp::GV &getGridValue() {
108 return llvm::omp::NVPTXGridValues;
109}
110
Johannes Doerfert67ab8752021-07-25 18:26:44111LaneMaskTy activemask() {
112 unsigned int Mask;
113 asm("activemask.b32 %0;" : "=r"(Mask));
114 return Mask;
115}
116
117LaneMaskTy lanemaskLT() {
118 __kmpc_impl_lanemask_t Res;
119 asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
120 return Res;
121}
122
123LaneMaskTy lanemaskGT() {
124 __kmpc_impl_lanemask_t Res;
125 asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
126 return Res;
127}
128
129uint32_t getThreadIdInWarp() {
130 return mapping::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
131}
132
133uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
134
135uint32_t getBlockSize() {
136 return __nvvm_read_ptx_sreg_ntid_x() -
137 (!mapping::isSPMDMode() * mapping::getWarpSize());
138}
139
140uint32_t getKernelSize() { return __nvvm_read_ptx_sreg_nctaid_x(); }
141
142uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
143
144uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
145
146uint32_t getNumberOfProcessorElements() {
147 return __nvvm_read_ptx_sreg_ntid_x();
148}
149
150uint32_t getWarpId() {
151 return mapping::getThreadIdInBlock() / mapping::getWarpSize();
152}
153
Johannes Doerfert67ab8752021-07-25 18:26:44154uint32_t getNumberOfWarpsInBlock() {
155 return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
156 mapping::getWarpSize();
157}
158
159#pragma omp end declare variant
160///}
161
Jon Chesterfield842f8752021-08-23 19:25:23162uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
163
Johannes Doerfert67ab8752021-07-25 18:26:44164} // namespace impl
165} // namespace _OMP
166
Johannes Doerfertccb5d272021-10-30 19:24:25167static bool isInLastWarp() {
Johannes Doerfert67ab8752021-07-25 18:26:44168 uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
169 ~(mapping::getWarpSize() - 1);
170 return mapping::getThreadIdInBlock() == MainTId;
171}
172
Johannes Doerfertccb5d272021-10-30 19:24:25173bool mapping::isMainThreadInGenericMode(bool IsSPMD) {
174 if (IsSPMD || icv::Level)
175 return false;
176
177 // Check if this is the last warp in the block.
178 return isInLastWarp();
179}
180
Joseph Huber85ad5662021-10-09 01:52:54181bool mapping::isMainThreadInGenericMode() {
182 return mapping::isMainThreadInGenericMode(mapping::isSPMDMode());
183}
184
Johannes Doerfertccb5d272021-10-30 19:24:25185bool mapping::isInitialThreadInLevel0(bool IsSPMD) {
186 if (IsSPMD)
187 return mapping::getThreadIdInBlock() == 0;
188 return isInLastWarp();
189}
190
Johannes Doerfert67ab8752021-07-25 18:26:44191bool mapping::isLeaderInWarp() {
192 __kmpc_impl_lanemask_t Active = mapping::activemask();
193 __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT();
194 return utils::popc(Active & LaneMaskLT) == 0;
195}
196
197LaneMaskTy mapping::activemask() { return impl::activemask(); }
198
199LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
200
201LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
202
203uint32_t mapping::getThreadIdInWarp() { return impl::getThreadIdInWarp(); }
204
205uint32_t mapping::getThreadIdInBlock() { return impl::getThreadIdInBlock(); }
206
207uint32_t mapping::getBlockSize() { return impl::getBlockSize(); }
208
209uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
210
211uint32_t mapping::getBlockId() { return impl::getBlockId(); }
212
213uint32_t mapping::getNumberOfBlocks() { return impl::getNumberOfBlocks(); }
214
215uint32_t mapping::getNumberOfProcessorElements() {
216 return impl::getNumberOfProcessorElements();
217}
218
219uint32_t mapping::getWarpId() { return impl::getWarpId(); }
220
221uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
222
223uint32_t mapping::getNumberOfWarpsInBlock() {
224 return impl::getNumberOfWarpsInBlock();
225}
226
227/// Execution mode
228///
229///{
230static int SHARED(IsSPMDMode);
231
232void mapping::init(bool IsSPMD) {
Johannes Doerfertccb5d272021-10-30 19:24:25233 if (mapping::isInitialThreadInLevel0(IsSPMD))
Johannes Doerfert67ab8752021-07-25 18:26:44234 IsSPMDMode = IsSPMD;
235}
236
237bool mapping::isSPMDMode() { return IsSPMDMode; }
238
239bool mapping::isGenericMode() { return !isSPMDMode(); }
240///}
241
Joseph Hubere95731c2021-09-21 19:32:41242extern "C" {
Joseph Huber1cf86df2021-09-22 13:20:49243__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
Joseph Huber74f91742021-10-18 15:14:07244 FunctionTracingRAII();
Joseph Hubere95731c2021-09-21 19:32:41245 return mapping::getThreadIdInBlock();
246}
Joseph Huberbad44d52021-10-09 00:08:28247
248__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
Joseph Huber74f91742021-10-18 15:14:07249 FunctionTracingRAII();
Joseph Huberbad44d52021-10-09 00:08:28250 return mapping::getNumberOfProcessorElements();
251}
Joseph Hubere95731c2021-09-21 19:32:41252}
Johannes Doerfert67ab8752021-07-25 18:26:44253#pragma omp end declare target