[email protected] | 40cb8e0c | 2012-04-02 10:35:44 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
[email protected] | 9fe1a5b | 2013-02-07 19:18:03 | [diff] [blame] | 5 | #include "base/strings/sys_string_conversions.h" |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 6 | |
avi | 84f37e1 | 2015-12-25 09:31:42 | [diff] [blame] | 7 | #include <stddef.h> |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 8 | #include <wchar.h> |
| 9 | |
[email protected] | eb62f726 | 2013-03-30 14:29:00 | [diff] [blame] | 10 | #include "base/strings/string_piece.h" |
[email protected] | a4ea1f1 | 2013-06-07 18:37:07 | [diff] [blame] | 11 | #include "base/strings/utf_string_conversions.h" |
avi | 84f37e1 | 2015-12-25 09:31:42 | [diff] [blame] | 12 | #include "build/build_config.h" |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 13 | |
| 14 | namespace base { |
| 15 | |
| 16 | std::string SysWideToUTF8(const std::wstring& wide) { |
| 17 | // In theory this should be using the system-provided conversion rather |
| 18 | // than our ICU, but this will do for now. |
| 19 | return WideToUTF8(wide); |
| 20 | } |
Reilly Grant | 39aecc3 | 2018-01-04 00:52:52 | [diff] [blame] | 21 | std::wstring SysUTF8ToWide(StringPiece utf8) { |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 22 | // In theory this should be using the system-provided conversion rather |
| 23 | // than our ICU, but this will do for now. |
| 24 | std::wstring out; |
| 25 | UTF8ToWide(utf8.data(), utf8.size(), &out); |
| 26 | return out; |
| 27 | } |
| 28 | |
halliwell | bd52a8a1 | 2014-12-12 17:21:04 | [diff] [blame] | 29 | #if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID) |
[email protected] | be16cf2 | 2011-06-27 19:13:10 | [diff] [blame] | 30 | // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb() |
| 31 | // support and a better understanding of what calls these routines. |
[email protected] | f05c495 | 2010-06-07 23:23:07 | [diff] [blame] | 32 | |
[email protected] | f05c495 | 2010-06-07 23:23:07 | [diff] [blame] | 33 | std::string SysWideToNativeMB(const std::wstring& wide) { |
| 34 | return WideToUTF8(wide); |
| 35 | } |
| 36 | |
Reilly Grant | 39aecc3 | 2018-01-04 00:52:52 | [diff] [blame] | 37 | std::wstring SysNativeMBToWide(StringPiece native_mb) { |
[email protected] | f05c495 | 2010-06-07 23:23:07 | [diff] [blame] | 38 | return SysUTF8ToWide(native_mb); |
| 39 | } |
| 40 | |
| 41 | #else |
| 42 | |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 43 | std::string SysWideToNativeMB(const std::wstring& wide) { |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 44 | mbstate_t ps; |
| 45 | |
| 46 | // Calculate the number of multi-byte characters. We walk through the string |
| 47 | // without writing the output, counting the number of multi-byte characters. |
| 48 | size_t num_out_chars = 0; |
| 49 | memset(&ps, 0, sizeof(ps)); |
| 50 | for (size_t i = 0; i < wide.size(); ++i) { |
| 51 | const wchar_t src = wide[i]; |
| 52 | // Use a temp buffer since calling wcrtomb with an output of NULL does not |
| 53 | // calculate the output length. |
| 54 | char buf[16]; |
| 55 | // Skip NULLs to avoid wcrtomb's special handling of them. |
| 56 | size_t res = src ? wcrtomb(buf, src, &ps) : 0; |
| 57 | switch (res) { |
| 58 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 59 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 60 | return std::string(); |
| 61 | break; |
| 62 | case 0: |
| 63 | // We hit an embedded null byte, keep going. |
| 64 | ++num_out_chars; |
| 65 | break; |
| 66 | default: |
| 67 | num_out_chars += res; |
| 68 | break; |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | if (num_out_chars == 0) |
| 73 | return std::string(); |
| 74 | |
| 75 | std::string out; |
| 76 | out.resize(num_out_chars); |
| 77 | |
| 78 | // We walk the input string again, with |i| tracking the index of the |
| 79 | // wide input, and |j| tracking the multi-byte output. |
| 80 | memset(&ps, 0, sizeof(ps)); |
| 81 | for (size_t i = 0, j = 0; i < wide.size(); ++i) { |
| 82 | const wchar_t src = wide[i]; |
[email protected] | 40cb8e0c | 2012-04-02 10:35:44 | [diff] [blame] | 83 | // We don't want wcrtomb to do its funkiness for embedded NULLs. |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 84 | size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; |
| 85 | switch (res) { |
| 86 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 87 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 88 | return std::string(); |
| 89 | break; |
| 90 | case 0: |
| 91 | // We hit an embedded null byte, keep going. |
| 92 | ++j; // Output is already zeroed. |
| 93 | break; |
| 94 | default: |
| 95 | j += res; |
| 96 | break; |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | return out; |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 101 | } |
| 102 | |
Reilly Grant | 39aecc3 | 2018-01-04 00:52:52 | [diff] [blame] | 103 | std::wstring SysNativeMBToWide(StringPiece native_mb) { |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 104 | mbstate_t ps; |
| 105 | |
| 106 | // Calculate the number of wide characters. We walk through the string |
| 107 | // without writing the output, counting the number of wide characters. |
| 108 | size_t num_out_chars = 0; |
| 109 | memset(&ps, 0, sizeof(ps)); |
| 110 | for (size_t i = 0; i < native_mb.size(); ) { |
| 111 | const char* src = native_mb.data() + i; |
Ivan Kotenkov | a16212a5 | 2017-11-08 12:37:33 | [diff] [blame] | 112 | size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps); |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 113 | switch (res) { |
| 114 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 115 | case static_cast<size_t>(-2): |
| 116 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 117 | return std::wstring(); |
| 118 | break; |
| 119 | case 0: |
| 120 | // We hit an embedded null byte, keep going. |
Nico Weber | 0ae8836 | 2018-01-25 19:26:02 | [diff] [blame] | 121 | i += 1; |
| 122 | FALLTHROUGH; |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 123 | default: |
| 124 | i += res; |
| 125 | ++num_out_chars; |
| 126 | break; |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | if (num_out_chars == 0) |
| 131 | return std::wstring(); |
| 132 | |
| 133 | std::wstring out; |
| 134 | out.resize(num_out_chars); |
| 135 | |
| 136 | memset(&ps, 0, sizeof(ps)); // Clear the shift state. |
| 137 | // We walk the input string again, with |i| tracking the index of the |
| 138 | // multi-byte input, and |j| tracking the wide output. |
| 139 | for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { |
| 140 | const char* src = native_mb.data() + i; |
| 141 | wchar_t* dst = &out[j]; |
| 142 | size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); |
| 143 | switch (res) { |
| 144 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 145 | case static_cast<size_t>(-2): |
| 146 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 147 | return std::wstring(); |
| 148 | break; |
| 149 | case 0: |
| 150 | i += 1; // Skip null byte. |
| 151 | break; |
| 152 | default: |
| 153 | i += res; |
| 154 | break; |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | return out; |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 159 | } |
| 160 | |
Scott Graham | c871209 | 2017-06-20 23:49:44 | [diff] [blame] | 161 | #endif // defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID) |
[email protected] | f05c495 | 2010-06-07 23:23:07 | [diff] [blame] | 162 | |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 163 | } // namespace base |