[flang] Speed common runtime cases of DOT_PRODUCT & MATMUL

Look for contiguous numeric argument arrays at runtime and
use specialized code for them.

Differential Revision: https://reviews.llvm.org/D112239
diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h
index ee2641b..3e0a68b 100644
--- a/flang/runtime/tools.h
+++ b/flang/runtime/tools.h
@@ -334,5 +334,12 @@
   return std::nullopt;
 }
 
+// Accumulate floating-point results in (at least) double precision
+template <TypeCategory CAT, int KIND>
+using AccumulationType = CppTypeFor<CAT,
+    CAT == TypeCategory::Real || CAT == TypeCategory::Complex
+        ? std::max(KIND, static_cast<int>(sizeof(double)))
+        : KIND>;
+
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_TOOLS_H_