[email protected] | 9486104 | 2012-08-04 02:28:36 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 4 | |
[email protected] | 9fe1a5b | 2013-02-07 19:18:03 | [diff] [blame] | 5 | #include "base/strings/sys_string_conversions.h" |
[email protected] | 4bdaceb4 | 2008-08-19 13:19:24 | [diff] [blame] | 6 | |
[email protected] | 03d95ac | 2008-10-08 21:02:56 | [diff] [blame] | 7 | #import <Foundation/Foundation.h> |
avi | 84f37e1 | 2015-12-25 09:31:42 | [diff] [blame] | 8 | #include <stddef.h> |
[email protected] | 03d95ac | 2008-10-08 21:02:56 | [diff] [blame] | 9 | |
[email protected] | 47944fd | 2008-08-07 19:31:16 | [diff] [blame] | 10 | #include <vector> |
[email protected] | 4bdaceb4 | 2008-08-19 13:19:24 | [diff] [blame] | 11 | |
[email protected] | 1671162f | 2011-04-29 15:06:32 | [diff] [blame] | 12 | #include "base/mac/foundation_util.h" |
[email protected] | df0ca6c8 | 2010-10-17 04:09:06 | [diff] [blame] | 13 | #include "base/mac/scoped_cftyperef.h" |
Peter Kasting | b7445486 | 2022-07-13 00:40:12 | [diff] [blame] | 14 | #include "base/numerics/safe_conversions.h" |
[email protected] | eb62f726 | 2013-03-30 14:29:00 | [diff] [blame] | 15 | #include "base/strings/string_piece.h" |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 16 | |
| 17 | namespace base { |
| 18 | |
| 19 | namespace { |
| 20 | |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 21 | // Converts the supplied CFString into the specified encoding, and returns it as |
| 22 | // a C++ library string of the template type. Returns an empty string on |
| 23 | // failure. |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 24 | // |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 25 | // Do not assert in this function since it is used by the assertion code! |
| 26 | template <typename StringType> |
| 27 | StringType CFStringToStringWithEncodingT(CFStringRef cfstring, |
| 28 | CFStringEncoding encoding) { |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 29 | CFIndex length = CFStringGetLength(cfstring); |
| 30 | if (length == 0) |
| 31 | return StringType(); |
| 32 | |
| 33 | CFRange whole_string = CFRangeMake(0, length); |
| 34 | CFIndex out_size; |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 35 | CFIndex converted = CFStringGetBytes(cfstring, whole_string, encoding, |
| 36 | /*lossByte=*/0, |
| 37 | /*isExternalRepresentation=*/false, |
| 38 | /*buffer=*/nullptr, |
| 39 | /*maxBufLen=*/0, &out_size); |
Peter Kasting | b7445486 | 2022-07-13 00:40:12 | [diff] [blame] | 40 | if (converted == 0 || out_size <= 0) |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 41 | return StringType(); |
| 42 | |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 43 | // `out_size` is the number of UInt8-sized units needed in the destination. |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 44 | // A buffer allocated as UInt8 units might not be properly aligned to |
| 45 | // contain elements of StringType::value_type. Use a container for the |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 46 | // proper value_type, and convert `out_size` by figuring the number of |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 47 | // value_type elements per UInt8. Leave room for a NUL terminator. |
Peter Kasting | b7445486 | 2022-07-13 00:40:12 | [diff] [blame] | 48 | size_t elements = static_cast<size_t>(out_size) * sizeof(UInt8) / |
| 49 | sizeof(typename StringType::value_type) + |
| 50 | 1; |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 51 | |
| 52 | std::vector<typename StringType::value_type> out_buffer(elements); |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 53 | converted = |
| 54 | CFStringGetBytes(cfstring, whole_string, encoding, |
| 55 | /*lossByte=*/0, |
| 56 | /*isExternalRepresentation=*/false, |
| 57 | reinterpret_cast<UInt8*>(&out_buffer[0]), out_size, |
| 58 | /*usedBufLen=*/nullptr); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 59 | if (converted == 0) |
| 60 | return StringType(); |
| 61 | |
| 62 | out_buffer[elements - 1] = '\0'; |
[email protected] | cf7e592 | 2008-10-22 20:24:07 | [diff] [blame] | 63 | return StringType(&out_buffer[0], elements - 1); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 64 | } |
| 65 | |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 66 | // Given a C++ library string `in` with an encoding specified by `in_encoding`, |
| 67 | // converts it to `out_encoding` and returns it as a C++ library string of the |
| 68 | // `OutStringType` template type. Returns an empty string on failure. |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 69 | // |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 70 | // Do not assert in this function since it is used by the assertion code! |
| 71 | template <typename InStringType, typename OutStringType> |
| 72 | OutStringType StringToStringWithEncodingsT(const InStringType& in, |
| 73 | CFStringEncoding in_encoding, |
| 74 | CFStringEncoding out_encoding) { |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 75 | typename InStringType::size_type in_length = in.length(); |
| 76 | if (in_length == 0) |
| 77 | return OutStringType(); |
| 78 | |
[email protected] | 3df79f4 | 2013-06-24 18:49:05 | [diff] [blame] | 79 | base::ScopedCFTypeRef<CFStringRef> cfstring(CFStringCreateWithBytesNoCopy( |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 80 | kCFAllocatorDefault, reinterpret_cast<const UInt8*>(in.data()), |
Peter Kasting | b7445486 | 2022-07-13 00:40:12 | [diff] [blame] | 81 | checked_cast<CFIndex>(in_length * |
| 82 | sizeof(typename InStringType::value_type)), |
| 83 | in_encoding, |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 84 | /*isExternalRepresentation=*/false, kCFAllocatorNull)); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 85 | if (!cfstring) |
| 86 | return OutStringType(); |
| 87 | |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 88 | return CFStringToStringWithEncodingT<OutStringType>(cfstring, out_encoding); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 89 | } |
| 90 | |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 91 | // Given a StringPiece `in` with an encoding specified by `in_encoding`, returns |
| 92 | // it as a CFStringRef. Returns null on failure. |
Jan Wilken Dörrie | 8ed6fce | 2021-03-25 23:00:38 | [diff] [blame] | 93 | template <typename CharT> |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 94 | ScopedCFTypeRef<CFStringRef> StringPieceToCFStringWithEncodingsT( |
Jan Wilken Dörrie | 8ed6fce | 2021-03-25 23:00:38 | [diff] [blame] | 95 | BasicStringPiece<CharT> in, |
[email protected] | d2a10d1 | 2008-08-22 19:55:26 | [diff] [blame] | 96 | CFStringEncoding in_encoding) { |
David Benjamin | c2b7455 | 2019-04-22 23:05:59 | [diff] [blame] | 97 | const auto in_length = in.length(); |
[email protected] | d2a10d1 | 2008-08-22 19:55:26 | [diff] [blame] | 98 | if (in_length == 0) |
Robert Sesek | 6c5a91025 | 2020-06-26 22:25:47 | [diff] [blame] | 99 | return ScopedCFTypeRef<CFStringRef>(CFSTR(""), base::scoped_policy::RETAIN); |
[email protected] | d2a10d1 | 2008-08-22 19:55:26 | [diff] [blame] | 100 | |
Robert Sesek | 6c5a91025 | 2020-06-26 22:25:47 | [diff] [blame] | 101 | return ScopedCFTypeRef<CFStringRef>(CFStringCreateWithBytes( |
David Benjamin | c2b7455 | 2019-04-22 23:05:59 | [diff] [blame] | 102 | kCFAllocatorDefault, reinterpret_cast<const UInt8*>(in.data()), |
Peter Kasting | b7445486 | 2022-07-13 00:40:12 | [diff] [blame] | 103 | checked_cast<CFIndex>(in_length * sizeof(CharT)), in_encoding, false)); |
[email protected] | d2a10d1 | 2008-08-22 19:55:26 | [diff] [blame] | 104 | } |
| 105 | |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 106 | } // namespace |
| 107 | |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 108 | // The CFStringEncodings used below specify the byte ordering explicitly, |
| 109 | // otherwise CFString will be confused when strings don't carry BOMs, as they |
| 110 | // typically won't. |
| 111 | |
| 112 | // Do not assert in this function since it is used by the assertion code! |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 113 | std::string SysWideToUTF8(const std::wstring& wide) { |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 114 | return StringToStringWithEncodingsT<std::wstring, std::string>( |
| 115 | wide, kCFStringEncodingUTF32LE, kCFStringEncodingUTF8); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 116 | } |
| 117 | |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 118 | // Do not assert in this function since it is used by the assertion code! |
Reilly Grant | 39aecc3 | 2018-01-04 00:52:52 | [diff] [blame] | 119 | std::wstring SysUTF8ToWide(StringPiece utf8) { |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 120 | return StringToStringWithEncodingsT<StringPiece, std::wstring>( |
| 121 | utf8, kCFStringEncodingUTF8, kCFStringEncodingUTF32LE); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 122 | } |
| 123 | |
| 124 | std::string SysWideToNativeMB(const std::wstring& wide) { |
[email protected] | 47944fd | 2008-08-07 19:31:16 | [diff] [blame] | 125 | return SysWideToUTF8(wide); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 126 | } |
| 127 | |
Reilly Grant | 39aecc3 | 2018-01-04 00:52:52 | [diff] [blame] | 128 | std::wstring SysNativeMBToWide(StringPiece native_mb) { |
[email protected] | 47944fd | 2008-08-07 19:31:16 | [diff] [blame] | 129 | return SysUTF8ToWide(native_mb); |
[email protected] | 15af80e | 2008-08-07 03:11:42 | [diff] [blame] | 130 | } |
| 131 | |
Robert Sesek | 6c5a91025 | 2020-06-26 22:25:47 | [diff] [blame] | 132 | ScopedCFTypeRef<CFStringRef> SysUTF8ToCFStringRef(StringPiece utf8) { |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 133 | return StringPieceToCFStringWithEncodingsT(utf8, kCFStringEncodingUTF8); |
[email protected] | d2a10d1 | 2008-08-22 19:55:26 | [diff] [blame] | 134 | } |
| 135 | |
Robert Sesek | 6c5a91025 | 2020-06-26 22:25:47 | [diff] [blame] | 136 | ScopedCFTypeRef<CFStringRef> SysUTF16ToCFStringRef(StringPiece16 utf16) { |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 137 | return StringPieceToCFStringWithEncodingsT(utf16, kCFStringEncodingUTF16LE); |
[email protected] | f224d57 | 2009-02-18 21:39:23 | [diff] [blame] | 138 | } |
| 139 | |
David Benjamin | c2b7455 | 2019-04-22 23:05:59 | [diff] [blame] | 140 | NSString* SysUTF8ToNSString(StringPiece utf8) { |
Robert Sesek | 6c5a91025 | 2020-06-26 22:25:47 | [diff] [blame] | 141 | return [mac::CFToNSCast(SysUTF8ToCFStringRef(utf8).release()) autorelease]; |
[email protected] | 03d95ac | 2008-10-08 21:02:56 | [diff] [blame] | 142 | } |
| 143 | |
David Benjamin | c2b7455 | 2019-04-22 23:05:59 | [diff] [blame] | 144 | NSString* SysUTF16ToNSString(StringPiece16 utf16) { |
Robert Sesek | 6c5a91025 | 2020-06-26 22:25:47 | [diff] [blame] | 145 | return [mac::CFToNSCast(SysUTF16ToCFStringRef(utf16).release()) autorelease]; |
[email protected] | f224d57 | 2009-02-18 21:39:23 | [diff] [blame] | 146 | } |
| 147 | |
[email protected] | d2a10d1 | 2008-08-22 19:55:26 | [diff] [blame] | 148 | std::string SysCFStringRefToUTF8(CFStringRef ref) { |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 149 | return CFStringToStringWithEncodingT<std::string>(ref, kCFStringEncodingUTF8); |
[email protected] | d2a10d1 | 2008-08-22 19:55:26 | [diff] [blame] | 150 | } |
| 151 | |
Jan Wilken Dörrie | 085b2aa | 2021-03-12 16:26:57 | [diff] [blame] | 152 | std::u16string SysCFStringRefToUTF16(CFStringRef ref) { |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 153 | return CFStringToStringWithEncodingT<std::u16string>( |
| 154 | ref, kCFStringEncodingUTF16LE); |
[email protected] | f224d57 | 2009-02-18 21:39:23 | [diff] [blame] | 155 | } |
| 156 | |
[email protected] | 03d95ac | 2008-10-08 21:02:56 | [diff] [blame] | 157 | std::string SysNSStringToUTF8(NSString* nsstring) { |
[email protected] | a2494cb | 2009-11-08 19:04:54 | [diff] [blame] | 158 | if (!nsstring) |
| 159 | return std::string(); |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 160 | return SysCFStringRefToUTF8(mac::NSToCFCast(nsstring)); |
[email protected] | 03d95ac | 2008-10-08 21:02:56 | [diff] [blame] | 161 | } |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 162 | |
Jan Wilken Dörrie | 085b2aa | 2021-03-12 16:26:57 | [diff] [blame] | 163 | std::u16string SysNSStringToUTF16(NSString* nsstring) { |
[email protected] | a2494cb | 2009-11-08 19:04:54 | [diff] [blame] | 164 | if (!nsstring) |
Jan Wilken Dörrie | 085b2aa | 2021-03-12 16:26:57 | [diff] [blame] | 165 | return std::u16string(); |
Avi Drissman | 2b8673a | 2022-01-04 20:38:49 | [diff] [blame] | 166 | return SysCFStringRefToUTF16(mac::NSToCFCast(nsstring)); |
[email protected] | f224d57 | 2009-02-18 21:39:23 | [diff] [blame] | 167 | } |
| 168 | |
[email protected] | 03d95ac | 2008-10-08 21:02:56 | [diff] [blame] | 169 | } // namespace base |