Blame - base/utf_offset_string_conversions.cc - chromium/src.git

blob: 4c47ef8e8cef3c43b3289e338442889ecf312542 [file] [log] [blame]

[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	1	// Copyright (c) 2009 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "base/utf_offset_string_conversions.h"
				6
				7	#include "base/string_piece.h"
				8	#include "base/utf_string_conversion_utils.h"
				9
				10	using base::PrepareForUTF16Or32Output;
				11	using base::ReadUnicodeCharacter;
				12	using base::WriteUnicodeCharacter;
				13
				14	// Generalized Unicode converter -----------------------------------------------
				15
				16	// Converts the given source Unicode character type to the given destination
				17	// Unicode character type as a STL string. The given input buffer and size
				18	// determine the source, and the given output STL string will be replaced by
				19	// the result.
				20	template<typename SRC_CHAR>
				21	bool ConvertUnicode(const SRC_CHAR* src,
				22	size_t src_len,
				23	std::wstring* output,
				24	size_t* offset_for_adjustment) {
				25	size_t output_offset =
				26	(offset_for_adjustment && *offset_for_adjustment < src_len) ?
				27	*offset_for_adjustment : std::wstring::npos;
				28
				29	// ICU requires 32-bit numbers.
				30	bool success = true;
				31	int32 src_len32 = static_cast<int32>(src_len);
				32	for (int32 i = 0; i < src_len32; i++) {
				33	uint32 code_point;
				34	size_t original_i = i;
				35	size_t chars_written = 0;
				36	if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
				37	chars_written = WriteUnicodeCharacter(code_point, output);
				38	} else {
[email protected]	d7a3e8e	2010-01-01 22:16:38	[diff] [blame^]	39	chars_written = WriteUnicodeCharacter(0xFFFD, output);
[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame]	40	success = false;
				41	}
				42	if ((output_offset != std::wstring::npos) &&
				43	(*offset_for_adjustment > original_i)) {
				44	// NOTE: ReadUnicodeCharacter() adjusts \|i\| to point _at_ the last
				45	// character read, not after it (so that incrementing it in the loop
				46	// increment will place it at the right location), so we need to account
				47	// for that in determining the amount that was read.
				48	if (*offset_for_adjustment <= static_cast<size_t>(i))
				49	output_offset = std::wstring::npos;
				50	else
				51	output_offset += chars_written - (i - original_i + 1);
				52	}
				53	}
				54
				55	if (offset_for_adjustment)
				56	*offset_for_adjustment = output_offset;
				57	return success;
				58	}
				59
				60	// UTF-8 <-> Wide --------------------------------------------------------------
				61
				62	bool UTF8ToWideAndAdjustOffset(const char* src,
				63	size_t src_len,
				64	std::wstring* output,
				65	size_t* offset_for_adjustment) {
				66	PrepareForUTF16Or32Output(src, src_len, output);
				67	return ConvertUnicode(src, src_len, output, offset_for_adjustment);
				68	}
				69
				70	std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
				71	size_t* offset_for_adjustment) {
				72	std::wstring ret;
				73	UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret,
				74	offset_for_adjustment);
				75	return ret;
				76	}
				77
				78	// UTF-16 <-> Wide -------------------------------------------------------------
				79
				80	#if defined(WCHAR_T_IS_UTF16)
				81
				82	// When wide == UTF-16, then conversions are a NOP.
				83	bool UTF16ToWideAndAdjustOffset(const char16* src,
				84	size_t src_len,
				85	std::wstring* output,
				86	size_t* offset_for_adjustment) {
				87	output->assign(src, src_len);
				88	if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
				89	*offset_for_adjustment = std::wstring::npos;
				90	return true;
				91	}
				92
				93	std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
				94	size_t* offset_for_adjustment) {
				95	if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
				96	*offset_for_adjustment = std::wstring::npos;
				97	return utf16;
				98	}
				99
				100	#elif defined(WCHAR_T_IS_UTF32)
				101
				102	bool UTF16ToWideAndAdjustOffset(const char16* src,
				103	size_t src_len,
				104	std::wstring* output,
				105	size_t* offset_for_adjustment) {
				106	output->clear();
				107	// Assume that normally we won't have any non-BMP characters so the counts
				108	// will be the same.
				109	output->reserve(src_len);
				110	return ConvertUnicode(src, src_len, output, offset_for_adjustment);
				111	}
				112
				113	std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
				114	size_t* offset_for_adjustment) {
				115	std::wstring ret;
				116	UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret,
				117	offset_for_adjustment);
				118	return ret;
				119	}
				120
				121	#endif // defined(WCHAR_T_IS_UTF32)