[OpenMP] Added the support for cache line size 256 for A64FX

Fugaku supercomputer is built with the Fujitsu A64FX microprocessor, whose cache line is 256. In current libomp, we only have cache line size 128 for PPC64 and otherwise 64. This patch added the support of cache line 256 for A64FX. It's worth noting that although A64FX is a variant of AArch64, this property is not shared. As a result, in light of UCX source code (https://github.com/openucx/ucx/blob/392443ab92626412605dee1572056f79c897c6c3/src/ucs/arch/aarch64/cpu.c#L17), we can only determine by checking whether the CPU is FUJITSU A64FX.

Reviewed By: jdoerfert, Hahnfeld

Differential Revision: https://reviews.llvm.org/D93169
diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index e24528e..6d8a539 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -66,7 +66,18 @@
   endif ()
   set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS})
 endif()
-libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 mic mips mips64 riscv64)
+
+# FUJITSU A64FX is a special processor because its cache line size is 256.
+# We need to pass this information into kmp_config.h.
+if(LIBOMP_ARCH STREQUAL "aarch64")
+  libomp_is_aarch64_a64fx(LIBOMP_DETECT_AARCH64_A64FX)
+  if (LIBOMP_DETECT_AARCH64_A64FX)
+    set(LIBOMP_ARCH "aarch64_a64fx")
+    set(LIBOMP_ARCH_AARCH64_A64FX TRUE)
+  endif()
+endif()
+
+libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64)
 
 set(LIBOMP_LIB_TYPE normal CACHE STRING
   "Performance,Profiling,Stubs library (normal/profile/stubs)")
@@ -136,6 +147,7 @@
 set(INTEL64 FALSE)
 set(ARM FALSE)
 set(AARCH64 FALSE)
+set(AARCH64_A64FX FALSE)
 set(PPC64BE FALSE)
 set(PPC64LE FALSE)
 set(PPC64 FALSE)
@@ -157,6 +169,8 @@
   set(PPC64 TRUE)
 elseif("${LIBOMP_ARCH}" STREQUAL "aarch64") # AARCH64 architecture
   set(AARCH64 TRUE)
+elseif("${LIBOMP_ARCH}" STREQUAL "aarch64_a64fx") # AARCH64_A64FX architecture
+  set(AARCH64_A64FX TRUE)
 elseif("${LIBOMP_ARCH}" STREQUAL "mic") # Intel(R) Many Integrated Core Architecture
   set(MIC TRUE)
 elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture