[Libomptarget] Remove remaining inline assembly from the device RTL (#79922)
Summary:
Recent patches have added some missing intrinsic functions NVPTX. This
patch gets rid of all the remaining uses of inline assembly. The one
change that wasn't directly replaced with a built-in was the `pack` and
`unpack` implementations. However, using the generic C implementation is
equivalent to the output SASS when run through PTXAS.
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index 822b8dc..31dd805 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -154,23 +154,11 @@
const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; }
-LaneMaskTy activemask() {
- unsigned int Mask;
- asm("activemask.b32 %0;" : "=r"(Mask));
- return Mask;
-}
+LaneMaskTy activemask() { return __nvvm_activemask(); }
-LaneMaskTy lanemaskLT() {
- __kmpc_impl_lanemask_t Res;
- asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res));
- return Res;
-}
+LaneMaskTy lanemaskLT() { return __nvvm_read_ptx_sreg_lanemask_lt(); }
-LaneMaskTy lanemaskGT() {
- __kmpc_impl_lanemask_t Res;
- asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res));
- return Res;
-}
+LaneMaskTy lanemaskGT() { return __nvvm_read_ptx_sreg_lanemask_gt(); }
uint32_t getThreadIdInBlock(int32_t Dim) {
switch (Dim) {