Blame - base/string_util_icu.cc - chromium/src.git

blob: 2287ba5b8ad2a5650636fb88ec0953e85ac620b0 [file] [log] [blame]

license.bot	bf09a50	2008-08-24 00:55:55	[diff] [blame]	1	// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	4
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	5	#include "base/string_util.h"
				6
				7	#include <string.h>
				8	#include <vector>
				9
				10	#include "base/basictypes.h"
				11	#include "base/logging.h"
				12	#include "base/singleton.h"
				13	#include "unicode/ucnv.h"
				14	#include "unicode/numfmt.h"
				15	#include "unicode/ustring.h"
				16
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	17	namespace {
				18
				19	// ReadUnicodeCharacter --------------------------------------------------------
				20
				21	// Reads a UTF-8 stream, placing the next code point into the given output
				22	// \|code_point\|. \|src\| represents the entire string to read, and \|char_index\|
				23	// is the character offset within the string to start reading at. \|*char_index\|
				24	// will be updated to index the last character read, such that incrementing it
				25	// (as in a for loop) will take the reader to the next character.
				26	//
				27	// Returns true on success. On false, \|*code_point\| will be invalid.
				28	bool ReadUnicodeCharacter(const char* src, int32 src_len,
[email protected]	d6b0667	2008-08-19 00:31:24	[diff] [blame]	29	int32* char_index, uint32* code_point_out) {
				30	// U8_NEXT expects to be able to use -1 to signal an error, so we must
				31	// use a signed type for code_point. But this function returns false
				32	// on error anyway, so code_point_out is unsigned.
				33	int32 code_point;
				34	U8_NEXT(src, *char_index, src_len, code_point);
				35	*code_point_out = static_cast<uint32>(code_point);
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	36
				37	// The ICU macro above moves to the next char, we want to point to the last
				38	// char consumed.
				39	(*char_index)--;
				40
				41	// Validate the decoded value.
[email protected]	d6b0667	2008-08-19 00:31:24	[diff] [blame]	42	return U_IS_UNICODE_CHAR(code_point);
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	43	}
				44
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	45	// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	46	bool ReadUnicodeCharacter(const char16* src, int32 src_len,
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	47	int32* char_index, uint32* code_point) {
				48	if (U16_IS_SURROGATE(src[*char_index])) {
				49	if (!U16_IS_SURROGATE_LEAD(src[*char_index]) \|\|
				50	*char_index + 1 >= src_len \|\|
				51	!U16_IS_TRAIL(src[*char_index + 1])) {
				52	// Invalid surrogate pair.
				53	return false;
				54	}
				55
				56	// Valid surrogate pair.
				57	code_point = U16_GET_SUPPLEMENTARY(src[char_index],
				58	src[*char_index + 1]);
				59	(*char_index)++;
				60	} else {
				61	// Not a surrogate, just one 16-bit word.
				62	code_point = src[char_index];
				63	}
				64
				65	return U_IS_UNICODE_CHAR(*code_point);
				66	}
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	67
				68	#if defined(WCHAR_T_IS_UTF32)
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	69	// Reads UTF-32 character. The usage is the same as the 8-bit version above.
[email protected]	a31e79e	2008-08-07 22:36:01	[diff] [blame]	70	bool ReadUnicodeCharacter(const wchar_t* src, int32 src_len,
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	71	int32* char_index, uint32* code_point) {
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	72	// Conversion is easy since the source is 32-bit.
				73	code_point = src[char_index];
				74
				75	// Validate the value.
				76	return U_IS_UNICODE_CHAR(*code_point);
				77	}
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	78	#endif // defined(WCHAR_T_IS_UTF32)
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	79
				80	// WriteUnicodeCharacter -------------------------------------------------------
				81
				82	// Appends a UTF-8 character to the given 8-bit string.
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	83	void WriteUnicodeCharacter(uint32 code_point, std::string* output) {
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	84	if (code_point <= 0x7f) {
				85	// Fast path the common case of one byte.
				86	output->push_back(code_point);
				87	return;
				88	}
				89
				90	// U8_APPEND_UNSAFE can append up to 4 bytes.
				91	int32 char_offset = static_cast<int32>(output->length());
				92	output->resize(char_offset + U8_MAX_LENGTH);
				93
				94	U8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
				95
				96	// U8_APPEND_UNSAFE will advance our pointer past the inserted character, so
				97	// it will represent the new length of the string.
				98	output->resize(char_offset);
				99	}
				100
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	101	// Appends the given code point as a UTF-16 character to the STL string.
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	102	void WriteUnicodeCharacter(uint32 code_point, string16* output) {
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	103	if (U16_LENGTH(code_point) == 1) {
				104	// Thie code point is in the Basic Multilingual Plane (BMP).
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	105	output->push_back(static_cast<char16>(code_point));
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	106	} else {
				107	// Non-BMP characters use a double-character encoding.
				108	int32 char_offset = static_cast<int32>(output->length());
				109	output->resize(char_offset + U16_MAX_LENGTH);
				110	U16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
				111	}
				112	}
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	113
				114	#if defined(WCHAR_T_IS_UTF32)
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	115	// Appends the given UTF-32 character to the given 32-bit string.
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	116	inline void WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	117	// This is the easy case, just append the character.
				118	output->push_back(code_point);
				119	}
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	120	#endif // defined(WCHAR_T_IS_UTF32)
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	121
				122	// Generalized Unicode converter -----------------------------------------------
				123
				124	// Converts the given source Unicode character type to the given destination
				125	// Unicode character type as a STL string. The given input buffer and size
				126	// determine the source, and the given output STL string will be replaced by
				127	// the result.
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	128	template<typename SRC_CHAR, typename DEST_STRING>
				129	bool ConvertUnicode(const SRC_CHAR* src, size_t src_len, DEST_STRING* output) {
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	130	output->clear();
				131
				132	// ICU requires 32-bit numbers.
				133	bool success = true;
				134	int32 src_len32 = static_cast<int32>(src_len);
				135	for (int32 i = 0; i < src_len32; i++) {
				136	uint32 code_point;
				137	if (ReadUnicodeCharacter(src, src_len32, &i, &code_point))
				138	WriteUnicodeCharacter(code_point, output);
				139	else
				140	success = false;
				141	}
				142	return success;
				143	}
				144
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	145
				146	// Guesses the length of the output in UTF-8 in bytes, and reserves that amount
				147	// of space in the given string. We also assume that the input character types
				148	// are unsigned, which will be true for UTF-16 and -32 on our systems. We assume
				149	// the string length is greater than zero.
				150	template<typename CHAR>
				151	void ReserveUTF8Output(const CHAR* src, size_t src_len, std::string* output) {
				152	if (src[0] < 0x80) {
				153	// Assume that the entire input will be ASCII.
				154	output->reserve(src_len);
				155	} else {
				156	// Assume that the entire input is non-ASCII and will have 3 bytes per char.
				157	output->reserve(src_len * 3);
				158	}
				159	}
				160
				161	// Guesses the size of the output buffer (containing either UTF-16 or -32 data)
				162	// given some UTF-8 input that will be converted to it. See ReserveUTF8Output.
				163	// We assume the source length is > 0.
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	164	template<typename STRING>
				165	void ReserveUTF16Or32Output(const char* src, size_t src_len, STRING* output) {
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	166	if (static_cast<unsigned char>(src[0]) < 0x80) {
				167	// Assume the input is all ASCII, which means 1:1 correspondence.
				168	output->reserve(src_len);
				169	} else {
				170	// Otherwise assume that the UTF-8 sequences will have 2 bytes for each
				171	// character.
				172	output->reserve(src_len / 2);
				173	}
				174	}
				175
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	176	} // namespace
				177
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	178	// UTF-8 <-> Wide --------------------------------------------------------------
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	179
				180	std::string WideToUTF8(const std::wstring& wide) {
				181	std::string ret;
				182	if (wide.empty())
				183	return ret;
				184
				185	// Ignore the success flag of this call, it will do the best it can for
				186	// invalid input, which is what we want here.
				187	WideToUTF8(wide.data(), wide.length(), &ret);
				188	return ret;
				189	}
				190
				191	bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
				192	if (src_len == 0) {
				193	output->clear();
				194	return true;
				195	}
				196
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	197	ReserveUTF8Output(src, src_len, output);
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	198	return ConvertUnicode<wchar_t, std::string>(src, src_len, output);
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	199	}
				200
				201	std::wstring UTF8ToWide(const std::string& utf8) {
				202	std::wstring ret;
				203	if (utf8.empty())
				204	return ret;
				205
				206	UTF8ToWide(utf8.data(), utf8.length(), &ret);
				207	return ret;
				208	}
				209
				210	bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
				211	if (src_len == 0) {
				212	output->clear();
				213	return true;
				214	}
				215
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	216	ReserveUTF16Or32Output(src, src_len, output);
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	217	return ConvertUnicode<char, std::wstring>(src, src_len, output);
[email protected]	6b27db80	2008-08-07 15:29:49	[diff] [blame]	218	}
				219
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	220	// UTF-16 <-> Wide -------------------------------------------------------------
				221
				222	#if defined(WCHAR_T_IS_UTF16)
				223
				224	// When wide == UTF-16, then conversions are a NOP.
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	225	string16 WideToUTF16(const std::wstring& wide) {
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	226	return wide;
				227	}
				228
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	229	bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	230	output->assign(src, src_len);
				231	return true;
				232	}
				233
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	234	std::wstring UTF16ToWide(const string16& utf16) {
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	235	return utf16;
				236	}
				237
				238	bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
				239	output->assign(src, src_len);
				240	return true;
				241	}
				242
				243	#elif defined(WCHAR_T_IS_UTF32)
				244
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	245	string16 WideToUTF16(const std::wstring& wide) {
				246	string16 ret;
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	247	if (wide.empty())
				248	return ret;
				249
[email protected]	a31e79e	2008-08-07 22:36:01	[diff] [blame]	250	WideToUTF16(wide.data(), wide.length(), &ret);
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	251	return ret;
				252	}
				253
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	254	bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	255	if (src_len == 0) {
				256	output->clear();
				257	return true;
				258	}
				259
				260	// Assume that normally we won't have any non-BMP characters so the counts
				261	// will be the same.
				262	output->reserve(src_len);
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	263	return ConvertUnicode<wchar_t, string16>(src, src_len, output);
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	264	}
				265
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	266	std::wstring UTF16ToWide(const string16& utf16) {
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	267	std::wstring ret;
				268	if (utf16.empty())
				269	return ret;
				270
[email protected]	a31e79e	2008-08-07 22:36:01	[diff] [blame]	271	UTF16ToWide(utf16.data(), utf16.length(), &ret);
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	272	return ret;
				273	}
				274
				275	bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
				276	if (src_len == 0) {
				277	output->clear();
				278	return true;
				279	}
				280
				281	// Assume that normally we won't have any non-BMP characters so the counts
				282	// will be the same.
				283	output->reserve(src_len);
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	284	return ConvertUnicode<char16, std::wstring>(src, src_len, output);
[email protected]	e6da5e1f	2008-08-07 20:27:57	[diff] [blame]	285	}
				286
				287	#endif // defined(WCHAR_T_IS_UTF32)
				288
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	289	// UTF16 <-> UTF8 --------------------------------------------------------------
				290
				291	#if defined(WCHAR_T_IS_UTF32)
				292
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	293	bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	294	if (src_len == 0) {
				295	output->clear();
				296	return true;
				297	}
				298
				299	ReserveUTF16Or32Output(src, src_len, output);
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	300	return ConvertUnicode<char, string16>(src, src_len, output);
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	301	}
				302
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	303	string16 UTF8ToUTF16(const std::string& utf8) {
				304	string16 ret;
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	305	if (utf8.empty())
				306	return ret;
				307
				308	// Ignore the success flag of this call, it will do the best it can for
				309	// invalid input, which is what we want here.
				310	UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
				311	return ret;
				312	}
				313
				314	bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
				315	if (src_len == 0) {
				316	output->clear();
				317	return true;
				318	}
				319
				320	ReserveUTF8Output(src, src_len, output);
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	321	return ConvertUnicode<char16, std::string>(src, src_len, output);
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	322	}
				323
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	324	std::string UTF16ToUTF8(const string16& utf16) {
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	325	std::string ret;
				326	if (utf16.empty())
				327	return ret;
				328
				329	// Ignore the success flag of this call, it will do the best it can for
				330	// invalid input, which is what we want here.
				331	UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
				332	return ret;
				333	}
				334
				335	#elif defined(WCHAR_T_IS_UTF16)
				336	// Easy case since we can use the "wide" versions we already wrote above.
				337
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	338	bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	339	return UTF8ToWide(src, src_len, output);
				340	}
				341
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	342	string16 UTF8ToUTF16(const std::string& utf8) {
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	343	return UTF8ToWide(utf8);
				344	}
				345
				346	bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
				347	return WideToUTF8(src, src_len, output);
				348	}
				349
[email protected]	d1370190	2008-08-27 20:57:35	[diff] [blame]	350	std::string UTF16ToUTF8(const string16& utf16) {
[email protected]	f0fcfd3	2008-08-26 19:27:24	[diff] [blame]	351	return WideToUTF8(utf16);
				352	}
				353
				354	#endif
				355
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	356	// Codepage <-> Wide -----------------------------------------------------------
				357
				358	// Convert a unicode string into the specified codepage_name. If the codepage
				359	// isn't found, return false.
				360	bool WideToCodepage(const std::wstring& wide,
				361	const char* codepage_name,
				362	OnStringUtilConversionError::Type on_error,
				363	std::string* encoded) {
				364	encoded->clear();
				365
				366	UErrorCode status = U_ZERO_ERROR;
				367	UConverter* converter = ucnv_open(codepage_name, &status);
				368	if (!U_SUCCESS(status))
				369	return false;
				370
				371	const UChar* uchar_src;
				372	int uchar_len;
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	373	#if defined(WCHAR_T_IS_UTF16)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	374	uchar_src = wide.c_str();
				375	uchar_len = static_cast<int>(wide.length());
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	376	#elif defined(WCHAR_T_IS_UTF32)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	377	// When wchar_t is wider than UChar (16 bits), transform \|wide\| into a
				378	// UChar* string. Size the UChar* buffer to be large enough to hold twice
[email protected]	703f427e	2008-08-13 01:17:18	[diff] [blame]	379	// as many UTF-16 code points as there are UTF-16 characters, in case each
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	380	// character translates to a UTF-16 surrogate pair, and leave room for a NUL
				381	// terminator.
				382	std::vector<UChar> wide_uchar(wide.length() * 2 + 1);
				383	u_strFromWCS(&wide_uchar[0], wide_uchar.size(), &uchar_len,
				384	wide.c_str(), wide.length(), &status);
				385	uchar_src = &wide_uchar[0];
				386	DCHECK(U_SUCCESS(status)) << "failed to convert wstring to UChar*";
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	387	#endif // defined(WCHAR_T_IS_UTF32)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	388
				389	int encoded_max_length = UCNV_GET_MAX_BYTES_FOR_STRING(uchar_len,
				390	ucnv_getMaxCharSize(converter));
				391	encoded->resize(encoded_max_length);
				392
				393	// Setup our error handler.
				394	switch (on_error) {
				395	case OnStringUtilConversionError::FAIL:
				396	ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0,
				397	NULL, NULL, &status);
				398	break;
				399	case OnStringUtilConversionError::SKIP:
				400	ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0,
				401	NULL, NULL, &status);
				402	break;
				403	default:
				404	NOTREACHED();
				405	}
				406
				407	// ucnv_fromUChars returns size not including terminating null
				408	int actual_size = ucnv_fromUChars(converter, &(*encoded)[0],
				409	encoded_max_length, uchar_src, uchar_len, &status);
				410	encoded->resize(actual_size);
				411	ucnv_close(converter);
				412	if (U_SUCCESS(status))
				413	return true;
				414	encoded->clear(); // Make sure the output is empty on error.
				415	return false;
				416	}
				417
				418	// Converts a string of the given codepage into unicode.
				419	// If the codepage isn't found, return false.
				420	bool CodepageToWide(const std::string& encoded,
				421	const char* codepage_name,
				422	OnStringUtilConversionError::Type on_error,
				423	std::wstring* wide) {
				424	wide->clear();
				425
				426	UErrorCode status = U_ZERO_ERROR;
				427	UConverter* converter = ucnv_open(codepage_name, &status);
				428	if (!U_SUCCESS(status))
				429	return false;
				430
				431	// The worst case is all the input characters are non-BMP (32-bit) ones.
				432	size_t uchar_max_length = encoded.length() * 2 + 1;
				433
				434	UChar* uchar_dst;
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	435	#if defined(WCHAR_T_IS_UTF16)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	436	uchar_dst = WriteInto(wide, uchar_max_length);
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	437	#elif defined(WCHAR_T_IS_UTF32)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	438	// When wchar_t is wider than UChar (16 bits), convert into a temporary
				439	// UChar* buffer.
				440	std::vector<UChar> wide_uchar(uchar_max_length);
				441	uchar_dst = &wide_uchar[0];
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	442	#endif // defined(WCHAR_T_IS_UTF32)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	443
				444	// Setup our error handler.
				445	switch (on_error) {
				446	case OnStringUtilConversionError::FAIL:
				447	ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, 0,
				448	NULL, NULL, &status);
				449	break;
				450	case OnStringUtilConversionError::SKIP:
				451	ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_SKIP, 0,
				452	NULL, NULL, &status);
				453	break;
				454	default:
				455	NOTREACHED();
				456	}
				457
				458	int actual_size = ucnv_toUChars(converter,
				459	uchar_dst,
				460	static_cast<int>(uchar_max_length),
				461	encoded.data(),
				462	static_cast<int>(encoded.length()),
				463	&status);
				464	ucnv_close(converter);
				465	if (!U_SUCCESS(status)) {
				466	wide->clear(); // Make sure the output is empty on error.
				467	return false;
				468	}
				469
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	470	#ifdef WCHAR_T_IS_UTF32
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	471	// When wchar_t is wider than UChar (16 bits), it's not possible to wind up
				472	// with any more wchar_t elements than UChar elements. ucnv_toUChars
				473	// returns the number of UChar elements not including the NUL terminator, so
				474	// leave extra room for that.
				475	u_strToWCS(WriteInto(wide, actual_size + 1), actual_size + 1, &actual_size,
				476	uchar_dst, actual_size, &status);
				477	DCHECK(U_SUCCESS(status)) << "failed to convert UChar* to wstring";
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	478	#endif // WCHAR_T_IS_UTF32
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	479
				480	wide->resize(actual_size);
				481	return true;
				482	}
				483
				484	// Number formatting -----------------------------------------------------------
				485
[email protected]	8988699	2008-08-13 15:32:27	[diff] [blame]	486	namespace {
				487
				488	struct NumberFormatSingletonTraits
				489	: public DefaultSingletonTraits<NumberFormat> {
				490	static NumberFormat* New() {
				491	UErrorCode status = U_ZERO_ERROR;
				492	NumberFormat* formatter = NumberFormat::createInstance(status);
				493	DCHECK(U_SUCCESS(status));
				494	return formatter;
				495	}
				496	// There's no ICU call to destroy a NumberFormat object other than
				497	// operator delete, so use the default Delete, which calls operator delete.
				498	// This can cause problems if a different allocator is used by this file than
				499	// by ICU.
				500	};
				501
				502	} // namespace
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	503
				504	std::wstring FormatNumber(int64 number) {
[email protected]	8988699	2008-08-13 15:32:27	[diff] [blame]	505	NumberFormat* number_format =
				506	Singleton<NumberFormat, NumberFormatSingletonTraits>::get();
				507
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	508	if (!number_format) {
				509	// As a fallback, just return the raw number in a string.
				510	return StringPrintf(L"%lld", number);
				511	}
				512	UnicodeString ustr;
				513	number_format->format(number, ustr);
				514
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	515	#if defined(WCHAR_T_IS_UTF16)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	516	return std::wstring(ustr.getBuffer(),
				517	static_cast<std::wstring::size_type>(ustr.length()));
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	518	#elif defined(WCHAR_T_IS_UTF32)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	519	wchar_t buffer[64]; // A int64 is less than 20 chars long, so 64 chars
				520	// leaves plenty of room for formating stuff.
				521	int length = 0;
				522	UErrorCode error = U_ZERO_ERROR;
				523	u_strToWCS(buffer, 64, &length, ustr.getBuffer(), ustr.length() , &error);
				524	if (U_FAILURE(error)) {
				525	NOTREACHED();
				526	// As a fallback, just return the raw number in a string.
				527	return StringPrintf(L"%lld", number);
				528	}
				529	return std::wstring(buffer, static_cast<std::wstring::size_type>(length));
[email protected]	39be424	2008-08-07 18:31:40	[diff] [blame]	530	#endif // defined(WCHAR_T_IS_UTF32)
initial.commit	d7cae12	2008-07-26 21:49:38	[diff] [blame]	531	}
license.bot	bf09a50	2008-08-24 00:55:55	[diff] [blame]	532