[email protected] | f781782 | 2009-09-24 05:11:58 | [diff] [blame] | 1 | // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "base/logging.h" |
| 6 | #include "base/string_util.h" |
| 7 | #include "googleurl/src/url_canon.h" |
| 8 | |
| 9 | #include <windows.h> |
| 10 | |
| 11 | //////////////////////////////////////////////////////////////////////////////// |
| 12 | // Avoid dependency on string_util_icu.cc (which pulls in icu). |
| 13 | |
| 14 | std::string WideToAnsiDirect(const wchar_t* wide, size_t wide_len) { |
| 15 | std::string ret; |
| 16 | char* write = WriteInto(&ret, wide_len + 1); |
| 17 | for (size_t i = 0; i < wide_len; ++i) { |
| 18 | // We can only convert characters below 0x80 directly from wide to ansi. |
| 19 | DCHECK(wide[i] <= 127) << "can't convert"; |
| 20 | write[i] = static_cast<char>(wide[i]); |
| 21 | } |
| 22 | |
| 23 | write[wide_len] = '\0'; |
| 24 | |
| 25 | return ret; |
| 26 | } |
| 27 | |
| 28 | bool WideToUTF8(const wchar_t* wide, size_t wide_len, std::string* utf8) { |
| 29 | DCHECK(utf8); |
| 30 | |
| 31 | // Add a cutoff. If it's all ASCII, convert it directly |
| 32 | size_t i; |
| 33 | for (i = 0; i < wide_len; ++i) { |
| 34 | if (wide[i] > 127) |
| 35 | break; |
| 36 | } |
| 37 | |
| 38 | // If we made it to the end without breaking, then it's all ANSI, so do a |
| 39 | // quick convert |
| 40 | if (i == wide_len) { |
| 41 | *utf8 = WideToAnsiDirect(wide, wide_len); |
| 42 | return true; |
| 43 | } |
| 44 | |
| 45 | // Figure out how long the string is |
| 46 | int size = WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, NULL, 0, NULL, |
| 47 | NULL); |
| 48 | |
| 49 | if (size > 0) { |
| 50 | WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, WriteInto(utf8, size), |
| 51 | size, NULL, NULL); |
| 52 | } |
| 53 | |
| 54 | return (size > 0); |
| 55 | } |
| 56 | |
| 57 | std::string WideToUTF8(const std::wstring& wide) { |
| 58 | std::string ret; |
| 59 | if (!wide.empty()) { |
| 60 | // Ignore the success flag of this call, it will do the best it can for |
| 61 | // invalid input, which is what we want here. |
| 62 | WideToUTF8(wide.data(), wide.length(), &ret); |
| 63 | } |
| 64 | return ret; |
| 65 | } |
| 66 | |
| 67 | bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { |
| 68 | DCHECK(output); |
| 69 | |
| 70 | if (src_len == 0) { |
| 71 | output->clear(); |
| 72 | return true; |
| 73 | } |
| 74 | |
| 75 | int wide_chars = MultiByteToWideChar(CP_UTF8, 0, src, src_len, NULL, 0); |
| 76 | if (!wide_chars) { |
| 77 | NOTREACHED(); |
| 78 | return false; |
| 79 | } |
| 80 | |
| 81 | wide_chars++; // make room for L'\0' |
| 82 | // Note that WriteInto will fill the string with '\0', so in the case |
| 83 | // where the input string is not \0 terminated, we will still be ensured |
| 84 | // that the output string will be. |
| 85 | if (!MultiByteToWideChar(CP_UTF8, 0, src, src_len, |
| 86 | WriteInto(output, wide_chars), wide_chars)) { |
| 87 | NOTREACHED(); |
| 88 | output->clear(); |
| 89 | return false; |
| 90 | } |
| 91 | |
| 92 | return true; |
| 93 | } |
| 94 | |
| 95 | std::wstring UTF8ToWide(const base::StringPiece& utf8) { |
| 96 | std::wstring ret; |
| 97 | if (!utf8.empty()) |
| 98 | UTF8ToWide(utf8.data(), utf8.length(), &ret); |
| 99 | return ret; |
| 100 | } |
| 101 | |
| 102 | #ifdef WCHAR_T_IS_UTF16 |
| 103 | string16 UTF8ToUTF16(const std::string& utf8) { |
| 104 | std::wstring ret; |
| 105 | if (!utf8.empty()) |
| 106 | UTF8ToWide(utf8.data(), utf8.length(), &ret); |
| 107 | return ret; |
| 108 | } |
| 109 | #else |
| 110 | #error Need WCHAR_T_IS_UTF16 |
| 111 | #endif |
| 112 | |
| 113 | //////////////////////////////////////////////////////////////////////////////// |
| 114 | // Replace ICU dependent functions in googleurl. |
| 115 | /*#define __UTF_H__ |
| 116 | #include "third_party/icu38/public/common/unicode/utf16.h" |
| 117 | #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) |
| 118 | extern const char16 kUnicodeReplacementCharacter;*/ |
| 119 | |
| 120 | namespace url_canon { |
| 121 | |
| 122 | bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output) { |
| 123 | // We should only hit this when the user attempts to navigate |
| 124 | // CF to an invalid URL. |
| 125 | DLOG(WARNING) << __FUNCTION__ << " not implemented"; |
| 126 | return false; |
| 127 | } |
| 128 | |
| 129 | bool ReadUTFChar(const char* str, int* begin, int length, |
| 130 | unsigned* code_point_out) { |
| 131 | // We should only hit this when the user attempts to navigate |
| 132 | // CF to an invalid URL. |
| 133 | DLOG(WARNING) << __FUNCTION__ << " not implemented"; |
| 134 | |
| 135 | // TODO(tommi): consider if we can use something like |
| 136 | // http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
| 137 | return false; |
| 138 | } |
| 139 | |
| 140 | bool ReadUTFChar(const char16* str, int* begin, int length, |
| 141 | unsigned* code_point) { |
| 142 | /* |
| 143 | if (U16_IS_SURROGATE(str[*begin])) { |
| 144 | if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length || |
| 145 | !U16_IS_TRAIL(str[*begin + 1])) { |
| 146 | // Invalid surrogate pair. |
| 147 | *code_point = kUnicodeReplacementCharacter; |
| 148 | return false; |
| 149 | } else { |
| 150 | // Valid surrogate pair. |
| 151 | *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]); |
| 152 | (*begin)++; |
| 153 | } |
| 154 | } else { |
| 155 | // Not a surrogate, just one 16-bit word. |
| 156 | *code_point = str[*begin]; |
| 157 | } |
| 158 | |
| 159 | if (U_IS_UNICODE_CHAR(*code_point)) |
| 160 | return true; |
| 161 | |
| 162 | // Invalid code point. |
| 163 | *code_point = kUnicodeReplacementCharacter; |
| 164 | return false;*/ |
| 165 | CHECK(false); |
| 166 | return false; |
| 167 | } |
| 168 | |
| 169 | } // namespace url_canon |