[email protected] | 40cb8e0c | 2012-04-02 10:35:44 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "base/sys_string_conversions.h" |
| 6 | |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 7 | #include <wchar.h> |
| 8 | |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 9 | #include "base/string_piece.h" |
[email protected] | 379e7a5 | 2010-03-09 00:38:41 | [diff] [blame] | 10 | #include "base/utf_string_conversions.h" |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 11 | |
| 12 | namespace base { |
| 13 | |
| 14 | std::string SysWideToUTF8(const std::wstring& wide) { |
| 15 | // In theory this should be using the system-provided conversion rather |
| 16 | // than our ICU, but this will do for now. |
| 17 | return WideToUTF8(wide); |
| 18 | } |
| 19 | std::wstring SysUTF8ToWide(const StringPiece& utf8) { |
| 20 | // In theory this should be using the system-provided conversion rather |
| 21 | // than our ICU, but this will do for now. |
| 22 | std::wstring out; |
| 23 | UTF8ToWide(utf8.data(), utf8.size(), &out); |
| 24 | return out; |
| 25 | } |
| 26 | |
[email protected] | be16cf2 | 2011-06-27 19:13:10 | [diff] [blame] | 27 | #if defined(OS_CHROMEOS) || defined(OS_ANDROID) |
| 28 | // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb() |
| 29 | // support and a better understanding of what calls these routines. |
[email protected] | f05c495 | 2010-06-07 23:23:07 | [diff] [blame] | 30 | |
| 31 | // ChromeOS always runs in UTF-8 locale. |
| 32 | std::string SysWideToNativeMB(const std::wstring& wide) { |
| 33 | return WideToUTF8(wide); |
| 34 | } |
| 35 | |
| 36 | std::wstring SysNativeMBToWide(const StringPiece& native_mb) { |
| 37 | return SysUTF8ToWide(native_mb); |
| 38 | } |
| 39 | |
| 40 | #else |
| 41 | |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 42 | std::string SysWideToNativeMB(const std::wstring& wide) { |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 43 | mbstate_t ps; |
| 44 | |
| 45 | // Calculate the number of multi-byte characters. We walk through the string |
| 46 | // without writing the output, counting the number of multi-byte characters. |
| 47 | size_t num_out_chars = 0; |
| 48 | memset(&ps, 0, sizeof(ps)); |
| 49 | for (size_t i = 0; i < wide.size(); ++i) { |
| 50 | const wchar_t src = wide[i]; |
| 51 | // Use a temp buffer since calling wcrtomb with an output of NULL does not |
| 52 | // calculate the output length. |
| 53 | char buf[16]; |
| 54 | // Skip NULLs to avoid wcrtomb's special handling of them. |
| 55 | size_t res = src ? wcrtomb(buf, src, &ps) : 0; |
| 56 | switch (res) { |
| 57 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 58 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 59 | return std::string(); |
| 60 | break; |
| 61 | case 0: |
| 62 | // We hit an embedded null byte, keep going. |
| 63 | ++num_out_chars; |
| 64 | break; |
| 65 | default: |
| 66 | num_out_chars += res; |
| 67 | break; |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | if (num_out_chars == 0) |
| 72 | return std::string(); |
| 73 | |
| 74 | std::string out; |
| 75 | out.resize(num_out_chars); |
| 76 | |
| 77 | // We walk the input string again, with |i| tracking the index of the |
| 78 | // wide input, and |j| tracking the multi-byte output. |
| 79 | memset(&ps, 0, sizeof(ps)); |
| 80 | for (size_t i = 0, j = 0; i < wide.size(); ++i) { |
| 81 | const wchar_t src = wide[i]; |
[email protected] | 40cb8e0c | 2012-04-02 10:35:44 | [diff] [blame] | 82 | // We don't want wcrtomb to do its funkiness for embedded NULLs. |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 83 | size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; |
| 84 | switch (res) { |
| 85 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 86 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 87 | return std::string(); |
| 88 | break; |
| 89 | case 0: |
| 90 | // We hit an embedded null byte, keep going. |
| 91 | ++j; // Output is already zeroed. |
| 92 | break; |
| 93 | default: |
| 94 | j += res; |
| 95 | break; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | return out; |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | std::wstring SysNativeMBToWide(const StringPiece& native_mb) { |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 103 | mbstate_t ps; |
| 104 | |
| 105 | // Calculate the number of wide characters. We walk through the string |
| 106 | // without writing the output, counting the number of wide characters. |
| 107 | size_t num_out_chars = 0; |
| 108 | memset(&ps, 0, sizeof(ps)); |
| 109 | for (size_t i = 0; i < native_mb.size(); ) { |
| 110 | const char* src = native_mb.data() + i; |
| 111 | size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps); |
| 112 | switch (res) { |
| 113 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 114 | case static_cast<size_t>(-2): |
| 115 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 116 | return std::wstring(); |
| 117 | break; |
| 118 | case 0: |
| 119 | // We hit an embedded null byte, keep going. |
| 120 | i += 1; // Fall through. |
| 121 | default: |
| 122 | i += res; |
| 123 | ++num_out_chars; |
| 124 | break; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | if (num_out_chars == 0) |
| 129 | return std::wstring(); |
| 130 | |
| 131 | std::wstring out; |
| 132 | out.resize(num_out_chars); |
| 133 | |
| 134 | memset(&ps, 0, sizeof(ps)); // Clear the shift state. |
| 135 | // We walk the input string again, with |i| tracking the index of the |
| 136 | // multi-byte input, and |j| tracking the wide output. |
| 137 | for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { |
| 138 | const char* src = native_mb.data() + i; |
| 139 | wchar_t* dst = &out[j]; |
| 140 | size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); |
| 141 | switch (res) { |
| 142 | // Handle any errors and return an empty string. |
[email protected] | f884f2a | 2010-02-02 01:24:49 | [diff] [blame] | 143 | case static_cast<size_t>(-2): |
| 144 | case static_cast<size_t>(-1): |
[email protected] | 5ae918a | 2009-06-26 09:53:41 | [diff] [blame] | 145 | return std::wstring(); |
| 146 | break; |
| 147 | case 0: |
| 148 | i += 1; // Skip null byte. |
| 149 | break; |
| 150 | default: |
| 151 | i += res; |
| 152 | break; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | return out; |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 157 | } |
| 158 | |
[email protected] | f05c495 | 2010-06-07 23:23:07 | [diff] [blame] | 159 | #endif // OS_CHROMEOS |
| 160 | |
[email protected] | b2e9729 | 2008-09-02 18:20:34 | [diff] [blame] | 161 | } // namespace base |