| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef __OMPX_H |
| #define __OMPX_H |
| |
| #ifdef __AMDGCN_WAVEFRONT_SIZE |
| #define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE |
| #else |
| #define __WARP_SIZE 32 |
| #endif |
| |
| typedef unsigned long uint64_t; |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| int omp_get_ancestor_thread_num(int); |
| int omp_get_team_size(int); |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| /// Target kernel language extensions |
| /// |
| /// These extensions exist for the host to allow fallback implementations, |
| /// however, they cannot be arbitrarily composed with OpenMP. If the rules of |
| /// the kernel language are followed, the host fallbacks should behave as |
| /// expected since the kernel is represented as 3 sequential outer loops, one |
| /// for each grid dimension, and three (nested) parallel loops, one for each |
| /// block dimension. This fallback is not supposed to be optimal and should be |
| /// configurable by the user. |
| /// |
| ///{ |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| enum { |
| ompx_relaxed = __ATOMIC_RELAXED, |
| ompx_aquire = __ATOMIC_ACQUIRE, |
| ompx_release = __ATOMIC_RELEASE, |
| ompx_acq_rel = __ATOMIC_ACQ_REL, |
| ompx_seq_cst = __ATOMIC_SEQ_CST, |
| }; |
| |
| enum { |
| ompx_dim_x = 0, |
| ompx_dim_y = 1, |
| ompx_dim_z = 2, |
| }; |
| |
| // TODO: The following implementation is for host fallback. We need to disable |
| // generation of host fallback in kernel language mode. |
| #pragma omp begin declare variant match(device = {kind(cpu)}) |
| |
| /// ompx_{thread,block}_{id,dim} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE) \ |
| static inline int ompx_##NAME(int Dim) { return VALUE; } |
| |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id, |
| omp_get_ancestor_thread_num(Dim + 1)) |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1)) |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0) |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1) |
| #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C |
| ///} |
| |
| /// ompx_{sync_block}_{,divergent} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \ |
| static inline RETTY ompx_##NAME(ARGS) { BODY; } |
| |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering, |
| _Pragma("omp barrier")); |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void, |
| ompx_sync_block(ompx_acq_rel)); |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering, |
| ompx_sync_block(Ordering)); |
| #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C |
| ///} |
| |
| static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) { |
| __builtin_trap(); |
| } |
| |
| /// ompx_shfl_down_sync_{i,f,l,d} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \ |
| static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \ |
| unsigned delta, int width) { \ |
| __builtin_trap(); \ |
| } |
| |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d) |
| |
| #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL |
| ///} |
| |
| #pragma omp end declare variant |
| |
| /// ompx_{sync_block}_{,divergent} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \ |
| RETTY ompx_##NAME(ARGS); |
| |
| _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering); |
| _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void); |
| _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering); |
| #undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C |
| ///} |
| |
| /// ompx_{thread,block}_{id,dim}_{x,y,z} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \ |
| int ompx_##NAME(int Dim); \ |
| static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); } \ |
| static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); } \ |
| static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); } |
| |
| _TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id) |
| _TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim) |
| _TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id) |
| _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim) |
| #undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C |
| ///} |
| |
| uint64_t ompx_ballot_sync(uint64_t mask, int pred); |
| |
| /// ompx_shfl_down_sync_{i,f,l,d} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \ |
| TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \ |
| int width = __WARP_SIZE); |
| |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d) |
| |
| #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC |
| ///} |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| #ifdef __cplusplus |
| |
| namespace ompx { |
| |
| enum { |
| dim_x = ompx_dim_x, |
| dim_y = ompx_dim_y, |
| dim_z = ompx_dim_z, |
| }; |
| |
| enum { |
| relaxed = ompx_relaxed , |
| aquire = ompx_aquire, |
| release = ompx_release, |
| acc_rel = ompx_acq_rel, |
| seq_cst = ompx_seq_cst, |
| }; |
| |
| /// ompx::{thread,block}_{id,dim}_{,x,y,z} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \ |
| static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); } \ |
| static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); } \ |
| static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); } \ |
| static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); } |
| |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id) |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim) |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id) |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim) |
| #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX |
| ///} |
| |
| /// ompx_{sync_block}_{,divergent} |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \ |
| static inline RETTY NAME(ARGS) { \ |
| return ompx_##NAME(CALL_ARGS); \ |
| } |
| |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel, |
| Ordering); |
| _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent, |
| int Ordering = acc_rel, Ordering); |
| #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX |
| ///} |
| |
| static inline uint64_t ballot_sync(uint64_t mask, int pred) { |
| return ompx_ballot_sync(mask, pred); |
| } |
| |
| /// shfl_down_sync |
| ///{ |
| #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \ |
| static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \ |
| int width = __WARP_SIZE) { \ |
| return ompx_shfl_down_sync_##TY(mask, var, delta, width); \ |
| } |
| |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l) |
| _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d) |
| |
| #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC |
| ///} |
| |
| } // namespace ompx |
| #endif |
| |
| ///} |
| |
| #endif /* __OMPX_H */ |