[openmp] Use llvm GridValues from devicertl
Add include path to the cmakefiles and set the target_impl enums
from the llvm constants instead of copying the values.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D108391
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 148dad2..75efec3 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -21,6 +21,12 @@
return()
endif()
+if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS)
+ libomptarget_say("Not building device RTL: Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS")
+ return()
+endif()
+
+
# Check if we can create an LLVM bitcode implementation of the runtime library
# that could be inlined in the user application. For that we need to find
# a Clang compiler capable of compiling our CUDA files to LLVM bitcode and
@@ -132,6 +138,10 @@
set(clang_opt_flags -O1 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=2048)
set(link_opt_flags -O1 -openmp-opt-disable)
+# Prepend -I to each list element
+set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}")
+list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
+
# Set flags for LLVM Bitcode compilation.
set(bc_flags -S -x c++ -std=c++17
${clang_opt_flags}
@@ -141,6 +151,7 @@
-fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
-Xclang -target-feature -Xclang +ptx61
-I${include_directory}
+ ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
)
if(${LIBOMPTARGET_DEVICE_DEBUG})
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index fc3ca63..3bd0553 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -16,6 +16,8 @@
#pragma omp declare target
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
+
using namespace _OMP;
namespace _OMP {
@@ -26,6 +28,10 @@
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})
+constexpr const llvm::omp::GV &getGridValue() {
+ return llvm::omp::AMDGPUGridValues;
+}
+
uint32_t getGridDim(uint32_t n, uint16_t d) {
uint32_t q = n / d;
return q + (n > q * d);
@@ -86,8 +92,6 @@
return mapping::getThreadIdInBlock() / mapping::getWarpSize();
}
-uint32_t getWarpSize() { return 64; }
-
uint32_t getNumberOfWarpsInBlock() {
return mapping::getBlockSize() / mapping::getWarpSize();
}
@@ -101,6 +105,10 @@
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+constexpr const llvm::omp::GV &getGridValue() {
+ return llvm::omp::NVPTXGridValues;
+}
+
LaneMaskTy activemask() {
unsigned int Mask;
asm("activemask.b32 %0;" : "=r"(Mask));
@@ -144,8 +152,6 @@
return mapping::getThreadIdInBlock() / mapping::getWarpSize();
}
-uint32_t getWarpSize() { return 32; }
-
uint32_t getNumberOfWarpsInBlock() {
return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
mapping::getWarpSize();
@@ -154,6 +160,8 @@
#pragma omp end declare variant
///}
+uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
+
} // namespace impl
} // namespace _OMP