[libc++] Fixes std::to_chars for bases != 10.
While working on D70631, Microsoft's unit tests discovered an issue.
Our `std::to_chars` implementation for bases != 10 uses the range
`[first,last)` as temporary buffer. This violates the contract for
to_chars:
[charconv.to.chars]/1 http://eel.is/c++draft/charconv#to.chars-1
`to_chars_result to_chars(char* first, char* last, see below value, int base = 10);`
"If the member ec of the return value is such that the value is equal to
the value of a value-initialized errc, the conversion was successful and
the member ptr is the one-past-the-end pointer of the characters
written."
Our implementation modifies the range `[member ptr, last)`, which causes
Microsoft's test to fail. Their test verifies the buffer
`[member ptr, last)` is unchanged. (The test is only done when the
conversion is successful.)
While looking at the code I noticed the performance for bases != 10 also
is suboptimal. This is tracked in D97705.
This patch fixes the issue and adds a benchmark. This benchmark will be
used as baseline for D97705.
Reviewed By: #libc, Quuxplusone, zoecarver
Differential Revision: https://reviews.llvm.org/D100722
diff --git a/libcxx/benchmarks/to_chars.bench.cpp b/libcxx/benchmarks/to_chars.bench.cpp
new file mode 100644
index 0000000..1a3dc64
--- /dev/null
+++ b/libcxx/benchmarks/to_chars.bench.cpp
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <array>
+#include <charconv>
+#include <random>
+
+#include "benchmark/benchmark.h"
+#include "test_macros.h"
+
+static const std::array<unsigned, 1000> input = [] {
+ std::mt19937 generator;
+ std::uniform_int_distribution<unsigned> distribution(0, std::numeric_limits<unsigned>::max());
+ std::array<unsigned, 1000> result;
+ std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
+ return result;
+}();
+
+static void BM_to_chars_good(benchmark::State& state) {
+ char buffer[128];
+ int base = state.range(0);
+ while (state.KeepRunningBatch(input.size()))
+ for (auto value : input)
+ benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[128], value, base));
+}
+BENCHMARK(BM_to_chars_good)->DenseRange(2, 36, 1);
+
+static void BM_to_chars_bad(benchmark::State& state) {
+ char buffer[128];
+ int base = state.range(0);
+ struct sample {
+ unsigned size;
+ unsigned value;
+ };
+ std::array<sample, 1000> data;
+ // Assume the failure occurs, on average, halfway during the conversion.
+ std::transform(input.begin(), input.end(), data.begin(), [&](unsigned value) {
+ std::to_chars_result result = std::to_chars(buffer, &buffer[128], value, base);
+ return sample{unsigned((result.ptr - buffer) / 2), value};
+ });
+
+ while (state.KeepRunningBatch(data.size()))
+ for (auto element : data)
+ benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[element.size], element.value, base));
+}
+BENCHMARK(BM_to_chars_bad)->DenseRange(2, 36, 1);
+
+int main(int argc, char** argv) {
+ benchmark::Initialize(&argc, argv);
+ if (benchmark::ReportUnrecognizedArguments(argc, argv))
+ return 1;
+
+ benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/libcxx/include/charconv b/libcxx/include/charconv
index 8113552..64a2d03 100644
--- a/libcxx/include/charconv
+++ b/libcxx/include/charconv
@@ -79,6 +79,7 @@
#include <__utility/to_underlying.h>
#include <cmath> // for log2f
#include <cstdint>
+#include <cstdlib> // for _LIBCPP_UNREACHABLE
#include <cstring>
#include <limits>
#include <type_traits>
@@ -401,32 +402,53 @@
}
template <typename _Tp>
+_LIBCPP_AVAILABILITY_TO_CHARS _LIBCPP_INLINE_VISIBILITY int __to_chars_integral_width(_Tp __value, unsigned __base) {
+ _LIBCPP_ASSERT(__value >= 0, "The function requires a non-negative value.");
+
+ unsigned __base_2 = __base * __base;
+ unsigned __base_3 = __base_2 * __base;
+ unsigned __base_4 = __base_2 * __base_2;
+
+ int __r = 0;
+ while (true) {
+ if (__value < __base)
+ return __r + 1;
+ if (__value < __base_2)
+ return __r + 2;
+ if (__value < __base_3)
+ return __r + 3;
+ if (__value < __base_4)
+ return __r + 4;
+
+ __value /= __base_4;
+ __r += 4;
+ }
+
+ _LIBCPP_UNREACHABLE();
+}
+
+template <typename _Tp>
_LIBCPP_AVAILABILITY_TO_CHARS
inline _LIBCPP_INLINE_VISIBILITY to_chars_result
__to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
false_type)
{
- if (__base == 10)
- return __to_chars_itoa(__first, __last, __value, false_type());
+ if (__base == 10)
+ return __to_chars_itoa(__first, __last, __value, false_type());
- auto __p = __last;
- while (__p != __first)
- {
- auto __c = __value % __base;
- __value /= __base;
- *--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
- if (__value == 0)
- break;
- }
+ ptrdiff_t __cap = __last - __first;
+ int __n = __to_chars_integral_width(__value, __base);
+ if (__n > __cap)
+ return {__last, errc::value_too_large};
- auto __len = __last - __p;
- if (__value != 0 || !__len)
- return {__last, errc::value_too_large};
- else
- {
- _VSTD::memmove(__first, __p, __len);
- return {__first + __len, {}};
- }
+ __last = __first + __n;
+ char* __p = __last;
+ do {
+ unsigned __c = __value % __base;
+ __value /= __base;
+ *--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
+ } while (__value != 0);
+ return {__last, errc(0)};
}
template <typename _Tp, typename enable_if<is_integral<_Tp>::value, int>::type = 0>
diff --git a/libcxx/test/support/charconv_test_helpers.h b/libcxx/test/support/charconv_test_helpers.h
index 51e6a56..bbce485 100644
--- a/libcxx/test/support/charconv_test_helpers.h
+++ b/libcxx/test/support/charconv_test_helpers.h
@@ -12,6 +12,7 @@
#include <charconv>
#include <cassert>
#include <limits>
+#include <numeric>
#include <string.h>
#include <stdlib.h>
@@ -105,8 +106,13 @@
using std::to_chars;
std::to_chars_result r;
+ // Poison the buffer for testing whether a successful std::to_chars
+ // doesn't modify data beyond r.ptr.
+ std::iota(buf, buf + sizeof(buf), 1);
r = to_chars(buf, buf + sizeof(buf), v, args...);
assert(r.ec == std::errc{});
+ for (size_t i = r.ptr - buf; i < sizeof(buf); ++i)
+ assert(buf[i] == static_cast<char>(i + 1));
*r.ptr = '\0';
auto a = fromchars(buf, r.ptr, args...);