Skip to content

Commit 4e80bc7

Browse files
authored
[Clang] Introduce scoped variants of GNU atomic functions (#72280)
Summary: The standard GNU atomic operations are a very common way to target hardware atomics on the device. With more heterogenous devices being introduced, the concept of memory scopes has been in the LLVM language for awhile via the `syncscope` modifier. For targets, such as the GPU, this can change code generation depending on whether or not we only need to be consistent with the memory ordering with the entire system, the single GPU device, or lower. Previously these scopes were only exported via the `opencl` and `hip` variants of these functions. However, this made it difficult to use outside of those languages and the semantics were different from the standard GNU versions. This patch introduces a `__scoped_atomic` variant for the common functions. There was some discussion over whether or not these should be overloads of the existing ones, or simply new variants. I leant towards new variants to be less disruptive. The scope here can be one of the following ``` __MEMORY_SCOPE_SYSTEM // All devices and systems __MEMORY_SCOPE_DEVICE // Just this device __MEMORY_SCOPE_WRKGRP // A 'work-group' AKA CUDA block __MEMORY_SCOPE_WVFRNT // A 'wavefront' AKA CUDA warp __MEMORY_SCOPE_SINGLE // A single thread. ``` Naming consistency was attempted, but it is difficult to capture to full spectrum with no many names. Suggestions appreciated.
1 parent 32d5351 commit 4e80bc7

File tree

15 files changed

+788
-21
lines changed

15 files changed

+788
-21
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3867,6 +3867,30 @@ builtin function, and are named with a ``__opencl_`` prefix. The macros
38673867
and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values
38683868
corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.)
38693869
3870+
__scoped_atomic builtins
3871+
------------------------
3872+
3873+
Clang provides a set of atomics taking a memory scope argument. These atomics
3874+
are identical to the standard GNU / GCC atomic builtins but taking an extra
3875+
memory scope argument. These are designed to be a generic alternative to the
3876+
``__opencl_atomic_*`` builtin functions for targets that support atomic memory
3877+
scopes.
3878+
3879+
Atomic memory scopes are designed to assist optimizations for systems with
3880+
several levels of memory hierarchy like GPUs. The following memory scopes are
3881+
currently supported:
3882+
3883+
* ``__MEMORY_SCOPE_SYSTEM``
3884+
* ``__MEMORY_SCOPE_DEVICE``
3885+
* ``__MEMORY_SCOPE_WRKGRP``
3886+
* ``__MEMORY_SCOPE_WVFRNT``
3887+
* ``__MEMORY_SCOPE_SINGLE``
3888+
3889+
This controls whether or not the atomic operation is ordered with respect to the
3890+
whole system, the current device, an OpenCL workgroup, wavefront, or just a
3891+
single thread. If these are used on a target that does not support atomic
3892+
scopes, then they will behave exactly as the standard GNU atomic builtins.
3893+
38703894
Low-level ARM exclusive memory builtins
38713895
---------------------------------------
38723896

clang/include/clang/AST/Expr.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6498,7 +6498,7 @@ class AtomicExpr : public Expr {
64986498
return cast<Expr>(SubExprs[ORDER_FAIL]);
64996499
}
65006500
Expr *getVal2() const {
6501-
if (Op == AO__atomic_exchange)
6501+
if (Op == AO__atomic_exchange || Op == AO__scoped_atomic_exchange)
65026502
return cast<Expr>(SubExprs[ORDER_FAIL]);
65036503
assert(NumSubExprs > VAL2);
65046504
return cast<Expr>(SubExprs[VAL2]);
@@ -6539,7 +6539,9 @@ class AtomicExpr : public Expr {
65396539
getOp() == AO__opencl_atomic_compare_exchange_weak ||
65406540
getOp() == AO__hip_atomic_compare_exchange_weak ||
65416541
getOp() == AO__atomic_compare_exchange ||
6542-
getOp() == AO__atomic_compare_exchange_n;
6542+
getOp() == AO__atomic_compare_exchange_n ||
6543+
getOp() == AO__scoped_atomic_compare_exchange ||
6544+
getOp() == AO__scoped_atomic_compare_exchange_n;
65436545
}
65446546

65456547
bool isOpenCL() const {
@@ -6569,13 +6571,13 @@ class AtomicExpr : public Expr {
65696571
/// \return empty atomic scope model if the atomic op code does not have
65706572
/// scope operand.
65716573
static std::unique_ptr<AtomicScopeModel> getScopeModel(AtomicOp Op) {
6572-
auto Kind =
6573-
(Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
6574-
? AtomicScopeModelKind::OpenCL
6575-
: (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
6576-
? AtomicScopeModelKind::HIP
6577-
: AtomicScopeModelKind::None;
6578-
return AtomicScopeModel::create(Kind);
6574+
if (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
6575+
return AtomicScopeModel::create(AtomicScopeModelKind::OpenCL);
6576+
else if (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
6577+
return AtomicScopeModel::create(AtomicScopeModelKind::HIP);
6578+
else if (Op >= AO__scoped_atomic_load && Op <= AO__scoped_atomic_fetch_max)
6579+
return AtomicScopeModel::create(AtomicScopeModelKind::Generic);
6580+
return AtomicScopeModel::create(AtomicScopeModelKind::None);
65796581
}
65806582

65816583
/// Get atomic scope model.

clang/include/clang/Basic/Builtins.def

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,32 @@ BUILTIN(__atomic_signal_fence, "vi", "n")
904904
BUILTIN(__atomic_always_lock_free, "bzvCD*", "nE")
905905
BUILTIN(__atomic_is_lock_free, "bzvCD*", "nE")
906906

907+
// GNU atomic builtins with atomic scopes.
908+
ATOMIC_BUILTIN(__scoped_atomic_load, "v.", "t")
909+
ATOMIC_BUILTIN(__scoped_atomic_load_n, "v.", "t")
910+
ATOMIC_BUILTIN(__scoped_atomic_store, "v.", "t")
911+
ATOMIC_BUILTIN(__scoped_atomic_store_n, "v.", "t")
912+
ATOMIC_BUILTIN(__scoped_atomic_exchange, "v.", "t")
913+
ATOMIC_BUILTIN(__scoped_atomic_exchange_n, "v.", "t")
914+
ATOMIC_BUILTIN(__scoped_atomic_compare_exchange, "v.", "t")
915+
ATOMIC_BUILTIN(__scoped_atomic_compare_exchange_n, "v.", "t")
916+
ATOMIC_BUILTIN(__scoped_atomic_fetch_add, "v.", "t")
917+
ATOMIC_BUILTIN(__scoped_atomic_fetch_sub, "v.", "t")
918+
ATOMIC_BUILTIN(__scoped_atomic_fetch_and, "v.", "t")
919+
ATOMIC_BUILTIN(__scoped_atomic_fetch_or, "v.", "t")
920+
ATOMIC_BUILTIN(__scoped_atomic_fetch_xor, "v.", "t")
921+
ATOMIC_BUILTIN(__scoped_atomic_fetch_nand, "v.", "t")
922+
ATOMIC_BUILTIN(__scoped_atomic_add_fetch, "v.", "t")
923+
ATOMIC_BUILTIN(__scoped_atomic_sub_fetch, "v.", "t")
924+
ATOMIC_BUILTIN(__scoped_atomic_and_fetch, "v.", "t")
925+
ATOMIC_BUILTIN(__scoped_atomic_or_fetch, "v.", "t")
926+
ATOMIC_BUILTIN(__scoped_atomic_xor_fetch, "v.", "t")
927+
ATOMIC_BUILTIN(__scoped_atomic_max_fetch, "v.", "t")
928+
ATOMIC_BUILTIN(__scoped_atomic_min_fetch, "v.", "t")
929+
ATOMIC_BUILTIN(__scoped_atomic_nand_fetch, "v.", "t")
930+
ATOMIC_BUILTIN(__scoped_atomic_fetch_min, "v.", "t")
931+
ATOMIC_BUILTIN(__scoped_atomic_fetch_max, "v.", "t")
932+
907933
// OpenCL 2.0 atomic builtins.
908934
ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
909935
ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")

clang/include/clang/Basic/SyncScope.h

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ namespace clang {
4040
/// Update getAsString.
4141
///
4242
enum class SyncScope {
43+
SystemScope,
44+
DeviceScope,
45+
WorkgroupScope,
46+
WavefrontScope,
47+
SingleScope,
4348
HIPSingleThread,
4449
HIPWavefront,
4550
HIPWorkgroup,
@@ -54,6 +59,16 @@ enum class SyncScope {
5459

5560
inline llvm::StringRef getAsString(SyncScope S) {
5661
switch (S) {
62+
case SyncScope::SystemScope:
63+
return "system_scope";
64+
case SyncScope::DeviceScope:
65+
return "device_scope";
66+
case SyncScope::WorkgroupScope:
67+
return "workgroup_scope";
68+
case SyncScope::WavefrontScope:
69+
return "wavefront_scope";
70+
case SyncScope::SingleScope:
71+
return "single_scope";
5772
case SyncScope::HIPSingleThread:
5873
return "hip_singlethread";
5974
case SyncScope::HIPWavefront:
@@ -77,7 +92,7 @@ inline llvm::StringRef getAsString(SyncScope S) {
7792
}
7893

7994
/// Defines the kind of atomic scope models.
80-
enum class AtomicScopeModelKind { None, OpenCL, HIP };
95+
enum class AtomicScopeModelKind { None, OpenCL, HIP, Generic };
8196

8297
/// Defines the interface for synch scope model.
8398
class AtomicScopeModel {
@@ -205,6 +220,56 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
205220
}
206221
};
207222

223+
/// Defines the generic atomic scope model.
224+
class AtomicScopeGenericModel : public AtomicScopeModel {
225+
public:
226+
/// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
227+
enum ID {
228+
System = 0,
229+
Device = 1,
230+
Workgroup = 2,
231+
Wavefront = 3,
232+
Single = 4,
233+
Last = Single
234+
};
235+
236+
AtomicScopeGenericModel() = default;
237+
238+
SyncScope map(unsigned S) const override {
239+
switch (static_cast<ID>(S)) {
240+
case Device:
241+
return SyncScope::DeviceScope;
242+
case System:
243+
return SyncScope::SystemScope;
244+
case Workgroup:
245+
return SyncScope::WorkgroupScope;
246+
case Wavefront:
247+
return SyncScope::WavefrontScope;
248+
case Single:
249+
return SyncScope::SingleScope;
250+
}
251+
llvm_unreachable("Invalid language sync scope value");
252+
}
253+
254+
bool isValid(unsigned S) const override {
255+
return S >= static_cast<unsigned>(System) &&
256+
S <= static_cast<unsigned>(Last);
257+
}
258+
259+
ArrayRef<unsigned> getRuntimeValues() const override {
260+
static_assert(Last == Single, "Does not include all sync scopes");
261+
static const unsigned Scopes[] = {
262+
static_cast<unsigned>(Device), static_cast<unsigned>(System),
263+
static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
264+
static_cast<unsigned>(Single)};
265+
return llvm::ArrayRef(Scopes);
266+
}
267+
268+
unsigned getFallBackValue() const override {
269+
return static_cast<unsigned>(System);
270+
}
271+
};
272+
208273
inline std::unique_ptr<AtomicScopeModel>
209274
AtomicScopeModel::create(AtomicScopeModelKind K) {
210275
switch (K) {
@@ -214,6 +279,8 @@ AtomicScopeModel::create(AtomicScopeModelKind K) {
214279
return std::make_unique<AtomicScopeOpenCLModel>();
215280
case AtomicScopeModelKind::HIP:
216281
return std::make_unique<AtomicScopeHIPModel>();
282+
case AtomicScopeModelKind::Generic:
283+
return std::make_unique<AtomicScopeGenericModel>();
217284
}
218285
llvm_unreachable("Invalid atomic scope model kind");
219286
}

clang/lib/AST/Expr.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4887,6 +4887,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
48874887
case AO__atomic_load_n:
48884888
return 2;
48894889

4890+
case AO__scoped_atomic_load_n:
48904891
case AO__opencl_atomic_load:
48914892
case AO__hip_atomic_load:
48924893
case AO__c11_atomic_store:
@@ -4921,6 +4922,26 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
49214922
case AO__atomic_fetch_max:
49224923
return 3;
49234924

4925+
case AO__scoped_atomic_load:
4926+
case AO__scoped_atomic_store:
4927+
case AO__scoped_atomic_store_n:
4928+
case AO__scoped_atomic_fetch_add:
4929+
case AO__scoped_atomic_fetch_sub:
4930+
case AO__scoped_atomic_fetch_and:
4931+
case AO__scoped_atomic_fetch_or:
4932+
case AO__scoped_atomic_fetch_xor:
4933+
case AO__scoped_atomic_fetch_nand:
4934+
case AO__scoped_atomic_add_fetch:
4935+
case AO__scoped_atomic_sub_fetch:
4936+
case AO__scoped_atomic_and_fetch:
4937+
case AO__scoped_atomic_or_fetch:
4938+
case AO__scoped_atomic_xor_fetch:
4939+
case AO__scoped_atomic_nand_fetch:
4940+
case AO__scoped_atomic_min_fetch:
4941+
case AO__scoped_atomic_max_fetch:
4942+
case AO__scoped_atomic_fetch_min:
4943+
case AO__scoped_atomic_fetch_max:
4944+
case AO__scoped_atomic_exchange_n:
49244945
case AO__hip_atomic_exchange:
49254946
case AO__hip_atomic_fetch_add:
49264947
case AO__hip_atomic_fetch_sub:
@@ -4942,6 +4963,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
49424963
case AO__atomic_exchange:
49434964
return 4;
49444965

4966+
case AO__scoped_atomic_exchange:
49454967
case AO__c11_atomic_compare_exchange_strong:
49464968
case AO__c11_atomic_compare_exchange_weak:
49474969
return 5;
@@ -4952,6 +4974,10 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
49524974
case AO__atomic_compare_exchange:
49534975
case AO__atomic_compare_exchange_n:
49544976
return 6;
4977+
4978+
case AO__scoped_atomic_compare_exchange:
4979+
case AO__scoped_atomic_compare_exchange_n:
4980+
return 7;
49554981
}
49564982
llvm_unreachable("unknown atomic op");
49574983
}

clang/lib/AST/StmtPrinter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,7 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
18411841
PrintExpr(Node->getPtr());
18421842
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
18431843
Node->getOp() != AtomicExpr::AO__atomic_load_n &&
1844+
Node->getOp() != AtomicExpr::AO__scoped_atomic_load_n &&
18441845
Node->getOp() != AtomicExpr::AO__opencl_atomic_load &&
18451846
Node->getOp() != AtomicExpr::AO__hip_atomic_load) {
18461847
OS << ", ";

0 commit comments

Comments
 (0)