blob: fc3ca637ef4c6a7138e1188a76cc0aa7dc38dc30 [file] [log] [blame]
Johannes Doerfert67ab8752021-07-25 18:26:441//===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "Mapping.h"
13#include "State.h"
14#include "Types.h"
15#include "Utils.h"
16
17#pragma omp declare target
18
19using namespace _OMP;
20
21namespace _OMP {
22namespace impl {
23
24/// AMDGCN Implementation
25///
26///{
27#pragma omp begin declare variant match(device = {arch(amdgcn)})
28
29uint32_t getGridDim(uint32_t n, uint16_t d) {
30 uint32_t q = n / d;
31 return q + (n > q * d);
32}
33
34uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
35 uint16_t group_size) {
36 uint32_t r = grid_size - group_id * group_size;
37 return (r < group_size) ? r : group_size;
38}
39
40LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); }
41
42LaneMaskTy lanemaskLT() {
43 uint32_t Lane = mapping::getThreadIdInWarp();
44 int64_t Ballot = mapping::activemask();
45 uint64_t Mask = ((uint64_t)1 << Lane) - (uint64_t)1;
46 return Mask & Ballot;
47}
48
49LaneMaskTy lanemaskGT() {
50 uint32_t Lane = mapping::getThreadIdInWarp();
51 if (Lane == (mapping::getWarpSize() - 1))
52 return 0;
53 int64_t Ballot = mapping::activemask();
54 uint64_t Mask = (~((uint64_t)0)) << (Lane + 1);
55 return Mask & Ballot;
56}
57
58uint32_t getThreadIdInWarp() {
59 return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
60}
61
62uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
63
64uint32_t getBlockSize() {
65 // TODO: verify this logic for generic mode.
66 return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
67 __builtin_amdgcn_grid_size_x(),
68 __builtin_amdgcn_workgroup_size_x());
69}
70
71uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
72
73uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
74
75uint32_t getNumberOfBlocks() {
76 return getGridDim(__builtin_amdgcn_grid_size_x(),
77 __builtin_amdgcn_workgroup_size_x());
78}
79
80uint32_t getNumberOfProcessorElements() {
81 // TODO
82 return mapping::getBlockSize();
83}
84
85uint32_t getWarpId() {
86 return mapping::getThreadIdInBlock() / mapping::getWarpSize();
87}
88
89uint32_t getWarpSize() { return 64; }
90
91uint32_t getNumberOfWarpsInBlock() {
92 return mapping::getBlockSize() / mapping::getWarpSize();
93}
94
95#pragma omp end declare variant
96///}
97
98/// NVPTX Implementation
99///
100///{
101#pragma omp begin declare variant match( \
102 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
103
104LaneMaskTy activemask() {
105 unsigned int Mask;
106 asm("activemask.b32 %0;" : "=r"(Mask));
107 return Mask;
108}
109
110LaneMaskTy lanemaskLT() {
111 __kmpc_impl_lanemask_t Res;
112 asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
113 return Res;
114}
115
116LaneMaskTy lanemaskGT() {
117 __kmpc_impl_lanemask_t Res;
118 asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
119 return Res;
120}
121
122uint32_t getThreadIdInWarp() {
123 return mapping::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
124}
125
126uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
127
128uint32_t getBlockSize() {
129 return __nvvm_read_ptx_sreg_ntid_x() -
130 (!mapping::isSPMDMode() * mapping::getWarpSize());
131}
132
133uint32_t getKernelSize() { return __nvvm_read_ptx_sreg_nctaid_x(); }
134
135uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
136
137uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
138
139uint32_t getNumberOfProcessorElements() {
140 return __nvvm_read_ptx_sreg_ntid_x();
141}
142
143uint32_t getWarpId() {
144 return mapping::getThreadIdInBlock() / mapping::getWarpSize();
145}
146
147uint32_t getWarpSize() { return 32; }
148
149uint32_t getNumberOfWarpsInBlock() {
150 return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
151 mapping::getWarpSize();
152}
153
154#pragma omp end declare variant
155///}
156
157} // namespace impl
158} // namespace _OMP
159
160bool mapping::isMainThreadInGenericMode() {
161 if (mapping::isSPMDMode() || icv::Level)
162 return false;
163
164 // Check if this is the last warp in the block.
165 uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) &
166 ~(mapping::getWarpSize() - 1);
167 return mapping::getThreadIdInBlock() == MainTId;
168}
169
170bool mapping::isLeaderInWarp() {
171 __kmpc_impl_lanemask_t Active = mapping::activemask();
172 __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT();
173 return utils::popc(Active & LaneMaskLT) == 0;
174}
175
176LaneMaskTy mapping::activemask() { return impl::activemask(); }
177
178LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); }
179
180LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); }
181
182uint32_t mapping::getThreadIdInWarp() { return impl::getThreadIdInWarp(); }
183
184uint32_t mapping::getThreadIdInBlock() { return impl::getThreadIdInBlock(); }
185
186uint32_t mapping::getBlockSize() { return impl::getBlockSize(); }
187
188uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
189
190uint32_t mapping::getBlockId() { return impl::getBlockId(); }
191
192uint32_t mapping::getNumberOfBlocks() { return impl::getNumberOfBlocks(); }
193
194uint32_t mapping::getNumberOfProcessorElements() {
195 return impl::getNumberOfProcessorElements();
196}
197
198uint32_t mapping::getWarpId() { return impl::getWarpId(); }
199
200uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
201
202uint32_t mapping::getNumberOfWarpsInBlock() {
203 return impl::getNumberOfWarpsInBlock();
204}
205
206/// Execution mode
207///
208///{
209static int SHARED(IsSPMDMode);
210
211void mapping::init(bool IsSPMD) {
212 if (!mapping::getThreadIdInBlock())
213 IsSPMDMode = IsSPMD;
214}
215
216bool mapping::isSPMDMode() { return IsSPMDMode; }
217
218bool mapping::isGenericMode() { return !isSPMDMode(); }
219///}
220
221#pragma omp end declare target