blob: 2f3abcf2909c190ef4e1d55e0f475d2e2c1bacf5 [file] [log] [blame]
[email protected]f7817822009-09-24 05:11:581// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/logging.h"
6#include "base/string_util.h"
7#include "googleurl/src/url_canon.h"
8
9#include <windows.h>
10
11////////////////////////////////////////////////////////////////////////////////
12// Avoid dependency on string_util_icu.cc (which pulls in icu).
13
14std::string WideToAnsiDirect(const wchar_t* wide, size_t wide_len) {
15 std::string ret;
16 char* write = WriteInto(&ret, wide_len + 1);
17 for (size_t i = 0; i < wide_len; ++i) {
18 // We can only convert characters below 0x80 directly from wide to ansi.
19 DCHECK(wide[i] <= 127) << "can't convert";
20 write[i] = static_cast<char>(wide[i]);
21 }
22
23 write[wide_len] = '\0';
24
25 return ret;
26}
27
28bool WideToUTF8(const wchar_t* wide, size_t wide_len, std::string* utf8) {
29 DCHECK(utf8);
30
31 // Add a cutoff. If it's all ASCII, convert it directly
32 size_t i;
33 for (i = 0; i < wide_len; ++i) {
34 if (wide[i] > 127)
35 break;
36 }
37
38 // If we made it to the end without breaking, then it's all ANSI, so do a
39 // quick convert
40 if (i == wide_len) {
41 *utf8 = WideToAnsiDirect(wide, wide_len);
42 return true;
43 }
44
45 // Figure out how long the string is
46 int size = WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, NULL, 0, NULL,
47 NULL);
48
49 if (size > 0) {
50 WideCharToMultiByte(CP_UTF8, 0, wide, wide_len + 1, WriteInto(utf8, size),
51 size, NULL, NULL);
52 }
53
54 return (size > 0);
55}
56
57std::string WideToUTF8(const std::wstring& wide) {
58 std::string ret;
59 if (!wide.empty()) {
60 // Ignore the success flag of this call, it will do the best it can for
61 // invalid input, which is what we want here.
62 WideToUTF8(wide.data(), wide.length(), &ret);
63 }
64 return ret;
65}
66
67bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
68 DCHECK(output);
69
70 if (src_len == 0) {
71 output->clear();
72 return true;
73 }
74
75 int wide_chars = MultiByteToWideChar(CP_UTF8, 0, src, src_len, NULL, 0);
76 if (!wide_chars) {
77 NOTREACHED();
78 return false;
79 }
80
81 wide_chars++; // make room for L'\0'
82 // Note that WriteInto will fill the string with '\0', so in the case
83 // where the input string is not \0 terminated, we will still be ensured
84 // that the output string will be.
85 if (!MultiByteToWideChar(CP_UTF8, 0, src, src_len,
86 WriteInto(output, wide_chars), wide_chars)) {
87 NOTREACHED();
88 output->clear();
89 return false;
90 }
91
92 return true;
93}
94
95std::wstring UTF8ToWide(const base::StringPiece& utf8) {
96 std::wstring ret;
97 if (!utf8.empty())
98 UTF8ToWide(utf8.data(), utf8.length(), &ret);
99 return ret;
100}
101
102#ifdef WCHAR_T_IS_UTF16
103string16 UTF8ToUTF16(const std::string& utf8) {
104 std::wstring ret;
105 if (!utf8.empty())
106 UTF8ToWide(utf8.data(), utf8.length(), &ret);
107 return ret;
108}
109#else
110#error Need WCHAR_T_IS_UTF16
111#endif
112
113////////////////////////////////////////////////////////////////////////////////
114// Replace ICU dependent functions in googleurl.
115/*#define __UTF_H__
116#include "third_party/icu38/public/common/unicode/utf16.h"
117#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
118extern const char16 kUnicodeReplacementCharacter;*/
119
120namespace url_canon {
121
122bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output) {
123 // We should only hit this when the user attempts to navigate
124 // CF to an invalid URL.
125 DLOG(WARNING) << __FUNCTION__ << " not implemented";
126 return false;
127}
128
129bool ReadUTFChar(const char* str, int* begin, int length,
130 unsigned* code_point_out) {
131 // We should only hit this when the user attempts to navigate
132 // CF to an invalid URL.
133 DLOG(WARNING) << __FUNCTION__ << " not implemented";
134
135 // TODO(tommi): consider if we can use something like
136 // http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
137 return false;
138}
139
140bool ReadUTFChar(const char16* str, int* begin, int length,
141 unsigned* code_point) {
142/*
143 if (U16_IS_SURROGATE(str[*begin])) {
144 if (!U16_IS_SURROGATE_LEAD(str[*begin]) || *begin + 1 >= length ||
145 !U16_IS_TRAIL(str[*begin + 1])) {
146 // Invalid surrogate pair.
147 *code_point = kUnicodeReplacementCharacter;
148 return false;
149 } else {
150 // Valid surrogate pair.
151 *code_point = U16_GET_SUPPLEMENTARY(str[*begin], str[*begin + 1]);
152 (*begin)++;
153 }
154 } else {
155 // Not a surrogate, just one 16-bit word.
156 *code_point = str[*begin];
157 }
158
159 if (U_IS_UNICODE_CHAR(*code_point))
160 return true;
161
162 // Invalid code point.
163 *code_point = kUnicodeReplacementCharacter;
164 return false;*/
165 CHECK(false);
166 return false;
167}
168
169} // namespace url_canon