[Libomptarget] Address existing warnings in the device runtime library This patche attemps to address the current warnings in the OpenMP offloading device runtime. Previously we did not see these because we compiled the runtime without the standard warning flags enabled. However, these warnings are used when we now build the static library version of this runtime. This became extremely noisy when coupled with the fact the we compile each file roughly 32 times when all the architectures are considered. So it would be ideal to not have all these warnings show up when building. Most of these errors were simply implicit switch-case fallthroughs, which can be addressed using C++17's fallthrough attribute. Additionally there was a volatile variable that was being casted away. This is most likely safe to remove because we cast it away before its even used and didn't seem to affect anything in testing. Depends on D125260 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D125339

commit: ce0caf41bdd44366b9913a8afb3dd79d184687c6 [log] [tgz]
author: Joseph Huber <[email protected]> Tue May 10 21:33:41 2022
committer: Joseph Huber <[email protected]> Fri May 13 18:38:31 2022
tree: 040c5fc26719f6bc3ff021564aec25b5b27901bc
parent: b4f8443d97baf390e3a1e64021e39790c410af9d [diff]
diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp
index e97c77d..45e08fa 100644
--- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp

@@ -38,7 +38,7 @@
     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 int32_t vprintf(const char *, void *);
 namespace impl {
-static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
   return vprintf(Format, Arguments);
 }
 } // namespace impl
@@ -47,7 +47,7 @@
 // We do not have a vprintf implementation for AMD GPU yet so we use a stub.
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 namespace impl {
-static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
   return -1;
 }
 } // namespace impl

diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index 48ca13a..12ef58f 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp

@@ -46,7 +46,7 @@
 ///{
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 
-static const llvm::omp::GV &getGridValue() {
+const llvm::omp::GV &getGridValue() {
   return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
 }
 
@@ -121,9 +121,7 @@
   return __nvvm_read_ptx_sreg_ntid_x();
 }
 
-static const llvm::omp::GV &getGridValue() {
-  return llvm::omp::NVPTXGridValues;
-}
+const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; }
 
 LaneMaskTy activemask() {
   unsigned int Mask;

diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
index fd419b8..a4c73bd 100644
--- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp

@@ -158,52 +158,52 @@
       break;
     case 16:
       GlobalArgs[15] = args[15];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 15:
       GlobalArgs[14] = args[14];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 14:
       GlobalArgs[13] = args[13];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 13:
       GlobalArgs[12] = args[12];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 12:
       GlobalArgs[11] = args[11];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 11:
       GlobalArgs[10] = args[10];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 10:
       GlobalArgs[9] = args[9];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 9:
       GlobalArgs[8] = args[8];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 8:
       GlobalArgs[7] = args[7];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 7:
       GlobalArgs[6] = args[6];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 6:
       GlobalArgs[5] = args[5];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 5:
       GlobalArgs[4] = args[4];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 4:
       GlobalArgs[3] = args[3];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 3:
       GlobalArgs[2] = args[2];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 2:
       GlobalArgs[1] = args[1];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 1:
       GlobalArgs[0] = args[0];
-      // FALLTHROUGH
+      [[fallthrough]];
     case 0:
       break;
     }

diff --git a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
index 516da6b..e65cdd3 100644
--- a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp

@@ -167,8 +167,8 @@
 
 uint32_t kmpcMin(uint32_t x, uint32_t y) { return x < y ? x : y; }
 
-static volatile uint32_t IterCnt = 0;
-static volatile uint32_t Cnt = 0;
+static uint32_t IterCnt = 0;
+static uint32_t Cnt = 0;
 
 } // namespace
 
@@ -211,7 +211,7 @@
   // to the number of slots in the buffer.
   bool IsMaster = (ThreadId == 0);
   while (IsMaster) {
-    Bound = atomic::load((uint32_t *)&IterCnt, __ATOMIC_SEQ_CST);
+    Bound = atomic::load(&IterCnt, __ATOMIC_SEQ_CST);
     if (TeamId < Bound + num_of_records)
       break;
   }
@@ -228,8 +228,7 @@
     // Increment team counter.
     // This counter is incremented by all teams in the current
     // BUFFER_SIZE chunk.
-    ChunkTeamCount =
-        atomic::inc((uint32_t *)&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
+    ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
   }
   // Synchronize
   if (mapping::isSPMDMode())
@@ -305,8 +304,7 @@
   if (IsMaster && ChunkTeamCount == num_of_records - 1) {
     // Allow SIZE number of teams to proceed writing their
     // intermediate results to the global buffer.
-    atomic::add((uint32_t *)&IterCnt, uint32_t(num_of_records),
-                __ATOMIC_SEQ_CST);
+    atomic::add(&IterCnt, uint32_t(num_of_records), __ATOMIC_SEQ_CST);
   }
 
   return 0;

diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 685c697..f299aa0 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp

@@ -298,14 +298,8 @@
   return ThreadStates[TId]->ICVState.*Var;
 }
 
-uint32_t &lookup32Impl(uint32_t ICVStateTy::*Var) {
-  uint32_t TId = mapping::getThreadIdInBlock();
-  if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
-    return ThreadStates[TId]->ICVState.*Var;
-  return TeamState.ICVState.*Var;
-}
-uint64_t &lookup64Impl(uint64_t ICVStateTy::*Var) {
-  uint64_t TId = mapping::getThreadIdInBlock();
+template <typename IntTy> IntTy &lookupImpl(IntTy ICVStateTy::*Var) {
+  IntTy TId = mapping::getThreadIdInBlock();
   if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
     return ThreadStates[TId]->ICVState.*Var;
   return TeamState.ICVState.*Var;
@@ -330,27 +324,27 @@
   switch (Kind) {
   case state::VK_NThreads:
     if (IsReadonly)
-      return lookup32Impl(&ICVStateTy::NThreadsVar);
+      return lookupImpl<uint32_t>(&ICVStateTy::NThreadsVar);
     return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident);
   case state::VK_Level:
     if (IsReadonly)
-      return lookup32Impl(&ICVStateTy::LevelVar);
+      return lookupImpl<uint32_t>(&ICVStateTy::LevelVar);
     return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident);
   case state::VK_ActiveLevel:
     if (IsReadonly)
-      return lookup32Impl(&ICVStateTy::ActiveLevelVar);
+      return lookupImpl<uint32_t>(&ICVStateTy::ActiveLevelVar);
     return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident);
   case state::VK_MaxActiveLevels:
     if (IsReadonly)
-      return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar);
+      return lookupImpl<uint32_t>(&ICVStateTy::MaxActiveLevelsVar);
     return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident);
   case state::VK_RunSched:
     if (IsReadonly)
-      return lookup32Impl(&ICVStateTy::RunSchedVar);
+      return lookupImpl<uint32_t>(&ICVStateTy::RunSchedVar);
     return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident);
   case state::VK_RunSchedChunk:
     if (IsReadonly)
-      return lookup32Impl(&ICVStateTy::RunSchedChunkVar);
+      return lookupImpl<uint32_t>(&ICVStateTy::RunSchedChunkVar);
     return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident);
   case state::VK_ParallelTeamSize:
     return TeamState.ParallelTeamSize;

diff --git a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
index 81b3f6c..d168f21 100644
--- a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp

@@ -139,6 +139,7 @@
                        numberOfActiveOMPThreads);
         break;
       }
+      [[fallthrough]];
     } // note: if chunk <=0, use nochunk
     case kmp_sched_static_balanced_chunk: {
       if (chunk > 0) {
@@ -157,6 +158,7 @@
           ub = oldUb;
         break;
       }
+      [[fallthrough]];
     } // note: if chunk <=0, use nochunk
     case kmp_sched_static_nochunk: {
       ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid,
@@ -168,8 +170,9 @@
         ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
                        omp_get_num_teams());
         break;
-      } // note: if chunk <=0, use nochunk
-    }
+      }
+      [[fallthrough]];
+    } // note: if chunk <=0, use nochunk
     case kmp_sched_distr_static_nochunk: {
       ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
                        omp_get_num_teams());
@@ -341,7 +344,7 @@
     uint32_t change = utils::popc(active);
     __kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT();
     unsigned int rank = utils::popc(active & lane_mask_lt);
-    uint64_t warp_res;
+    uint64_t warp_res = 0;
     if (rank == 0) {
       warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST);
     }
commit	ce0caf41bdd44366b9913a8afb3dd79d184687c6	[log] [tgz]
author	Joseph Huber <[email protected]>	Tue May 10 21:33:41 2022
committer	Joseph Huber <[email protected]>	Fri May 13 18:38:31 2022
tree	040c5fc26719f6bc3ff021564aec25b5b27901bc
parent	b4f8443d97baf390e3a1e64021e39790c410af9d [diff]