[libc++] Further improve the contiguous-iterator story, and fix some bugs.

- Quality-of-implementation: Avoid calling __unwrap_iter in constexpr contexts.
    The user might conceivably write a contiguous iterator where normal iterator
    arithmetic is constexpr-friendly but `std::to_address(it)` isn't.

- Bugfix: When you pass contiguous iterators to `std::copy`, you should get
    back your contiguous iterator type, not a raw pointer. That means that
    libc++ can't `__unwrap_iter` unless it also does `__rewrap_iter`.
    Fortunately, this is implementable.

- Improve test coverage of the new `contiguous_iterator` test iterator.
    This catches the bug described above.

- Tests: Stop testing that we can `std::copy` //into// an `input_iterator`.
    Our test iterators may currently support that, but it seems nonsensical to me.

Differential Revision: https://reviews.llvm.org/D95983
diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm
index 04126a1..4c13929 100644
--- a/libcxx/include/algorithm
+++ b/libcxx/include/algorithm
@@ -1639,12 +1639,22 @@
                            __value_, __equal_to<__v, _Tp>());
 }
 
-// __unwrap_iter
+// __unwrap_iter, __rewrap_iter
 
-// The job of __unwrap_iter is to lower iterators-that-are-tantamount-to-pointers
-// (such as vector<T>::iterator) into pointers, to reduce the number of template
+// The job of __unwrap_iter is to lower contiguous iterators (such as
+// vector<T>::iterator) into pointers, to reduce the number of template
 // instantiations and to enable pointer-based optimizations e.g. in std::copy.
+// For iterators that are not contiguous, it must be a no-op.
 // In debug mode, we don't do this.
+//
+// __unwrap_iter is non-constexpr for user-defined iterators whose
+// `to_address` and/or `operator->` is non-constexpr. This is okay; but we
+// try to avoid doing __unwrap_iter in constant-evaluated contexts anyway.
+//
+// Some algorithms (e.g. std::copy, but not std::sort) need to convert an
+// "unwrapped" result back into a contiguous iterator. Since contiguous iterators
+// are random-access, we can do this portably using iterator arithmetic; this
+// is the job of __rewrap_iter.
 
 template <class _Iter, bool = __is_cpp17_contiguous_iterator<_Iter>::value>
 struct __unwrap_iter_impl {
@@ -1674,6 +1684,20 @@
     return _Impl::__apply(__i);
 }
 
+template<class _OrigIter>
+_OrigIter __rewrap_iter(_OrigIter, _OrigIter __result)
+{
+    return __result;
+}
+
+template<class _OrigIter, class _UnwrappedIter>
+_OrigIter __rewrap_iter(_OrigIter __first, _UnwrappedIter __result)
+{
+    // Precondition: __result is reachable from __first
+    // Precondition: _OrigIter is a contiguous iterator
+    return __first + (__result - _VSTD::__unwrap_iter(__first));
+}
+
 // copy
 
 template <class _InputIterator, class _OutputIterator>
@@ -1716,11 +1740,12 @@
 copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
 {
     if (__libcpp_is_constant_evaluated()) {
-        return _VSTD::__copy_constexpr(
-            _VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _VSTD::__unwrap_iter(__result));
+        return _VSTD::__copy_constexpr(__first, __last, __result);
     } else {
-        return _VSTD::__copy(
-            _VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _VSTD::__unwrap_iter(__result));
+        return _VSTD::__rewrap_iter(__result,
+            _VSTD::__copy(_VSTD::__unwrap_iter(__first),
+                          _VSTD::__unwrap_iter(__last),
+                          _VSTD::__unwrap_iter(__result)));
     }
 }
 
@@ -1770,13 +1795,12 @@
               _BidirectionalIterator2 __result)
 {
     if (__libcpp_is_constant_evaluated()) {
-        return _VSTD::__copy_backward_constexpr(_VSTD::__unwrap_iter(__first),
-                                                _VSTD::__unwrap_iter(__last),
-                                                _VSTD::__unwrap_iter(__result));
+        return _VSTD::__copy_backward_constexpr(__first, __last, __result);
     } else {
-        return _VSTD::__copy_backward(_VSTD::__unwrap_iter(__first),
-                                      _VSTD::__unwrap_iter(__last),
-                                      _VSTD::__unwrap_iter(__result));
+        return _VSTD::__rewrap_iter(__result,
+            _VSTD::__copy_backward(_VSTD::__unwrap_iter(__first),
+                                   _VSTD::__unwrap_iter(__last),
+                                   _VSTD::__unwrap_iter(__result)));
     }
 }
 
@@ -1843,8 +1867,6 @@
 
 // move
 
-// __move_constexpr exists so that __move doesn't call itself when delegating to the constexpr
-// version of __move.
 template <class _InputIterator, class _OutputIterator>
 inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 _OutputIterator
@@ -1873,8 +1895,6 @@
 >::type
 __move(_Tp* __first, _Tp* __last, _Up* __result)
 {
-    if (__libcpp_is_constant_evaluated())
-        return _VSTD::__move_constexpr(__first, __last, __result);
     const size_t __n = static_cast<size_t>(__last - __first);
     if (__n > 0)
         _VSTD::memmove(__result, __first, __n * sizeof(_Up));
@@ -1886,13 +1906,18 @@
 _OutputIterator
 move(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
 {
-    return _VSTD::__move(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _VSTD::__unwrap_iter(__result));
+    if (__libcpp_is_constant_evaluated()) {
+        return _VSTD::__move_constexpr(__first, __last, __result);
+    } else {
+        return _VSTD::__rewrap_iter(__result,
+            _VSTD::__move(_VSTD::__unwrap_iter(__first),
+                          _VSTD::__unwrap_iter(__last),
+                          _VSTD::__unwrap_iter(__result)));
+    }
 }
 
 // move_backward
 
-// __move_backward_constexpr exists so that __move_backward doesn't call itself when delegating to
-// the constexpr version of __move_backward.
 template <class _InputIterator, class _OutputIterator>
 inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 _OutputIterator
@@ -1921,8 +1946,6 @@
 >::type
 __move_backward(_Tp* __first, _Tp* __last, _Up* __result)
 {
-    if (__libcpp_is_constant_evaluated())
-        return _VSTD::__move_backward_constexpr(__first, __last, __result);
     const size_t __n = static_cast<size_t>(__last - __first);
     if (__n > 0)
     {
@@ -1938,7 +1961,14 @@
 move_backward(_BidirectionalIterator1 __first, _BidirectionalIterator1 __last,
               _BidirectionalIterator2 __result)
 {
-    return _VSTD::__move_backward(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _VSTD::__unwrap_iter(__result));
+    if (__libcpp_is_constant_evaluated()) {
+        return _VSTD::__move_backward_constexpr(__first, __last, __result);
+    } else {
+        return _VSTD::__rewrap_iter(__result,
+            _VSTD::__move_backward(_VSTD::__unwrap_iter(__first),
+                                   _VSTD::__unwrap_iter(__last),
+                                   _VSTD::__unwrap_iter(__result)));
+    }
 }
 
 // iter_swap