Blame - base/utf_offset_string_conversions_unittest.cc - chromium/src.git

blob: 00d87d3b8d7a303e40165ba90da9e6c9baf109cb [file] [log] [blame]

[email protected]	b9f9383	2009-11-13 19:27:48	[diff] [blame^]	1	// Copyright (c) 2009 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "base/logging.h"
				6	#include "base/string_piece.h"
				7	#include "base/utf_offset_string_conversions.h"
				8	#include "testing/gtest/include/gtest/gtest.h"
				9
				10	namespace base {
				11
				12	namespace {
				13
				14	// Given a null-terminated string of wchar_t with each wchar_t representing
				15	// a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
				16	// Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
				17	// should be represented as a surrogate pair (two UTF-16 units)
				18	// even where wchar_t is 32-bit (Linux and Mac).
				19	//
				20	// This is to help write tests for functions with string16 params until
				21	// the C++ 0x UTF-16 literal is well-supported by compilers.
				22	string16 BuildString16(const wchar_t* s) {
				23	#if defined(WCHAR_T_IS_UTF16)
				24	return string16(s);
				25	#elif defined(WCHAR_T_IS_UTF32)
				26	string16 u16;
				27	while (*s != 0) {
				28	DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
				29	u16.push_back(*s++);
				30	}
				31	return u16;
				32	#endif
				33	}
				34
				35	} // namespace
				36
				37	TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
				38	struct UTF8ToWideCase {
				39	const char* utf8;
				40	size_t input_offset;
				41	size_t output_offset;
				42	} utf8_to_wide_cases[] = {
				43	{"", 0, std::wstring::npos},
				44	{"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos},
				45	{"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
				46	{"\xed\xb0\x80z", 3, 0},
				47	{"A\xF0\x90\x8C\x80z", 1, 1},
				48	{"A\xF0\x90\x8C\x80z", 2, std::wstring::npos},
				49	#if defined(WCHAR_T_IS_UTF16)
				50	{"A\xF0\x90\x8C\x80z", 5, 3},
				51	#elif defined(WCHAR_T_IS_UTF32)
				52	{"A\xF0\x90\x8C\x80z", 5, 2},
				53	#endif
				54	};
				55	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_wide_cases); ++i) {
				56	size_t offset = utf8_to_wide_cases[i].input_offset;
				57	UTF8ToWideAndAdjustOffset(utf8_to_wide_cases[i].utf8, &offset);
				58	EXPECT_EQ(utf8_to_wide_cases[i].output_offset, offset);
				59	}
				60
				61	#if defined(WCHAR_T_IS_UTF32)
				62	struct UTF16ToWideCase {
				63	const wchar_t* wide;
				64	size_t input_offset;
				65	size_t output_offset;
				66	} utf16_to_wide_cases[] = {
				67	{L"\xD840\xDC00\x4E00", 0, 0},
				68	{L"\xD840\xDC00\x4E00", 1, std::wstring::npos},
				69	{L"\xD840\xDC00\x4E00", 2, 1},
				70	};
				71	for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_wide_cases); ++i) {
				72	size_t offset = utf16_to_wide_cases[i].input_offset;
				73	UTF16ToWideAndAdjustOffset(BuildString16(utf16_to_wide_cases[i].wide),
				74	&offset);
				75	EXPECT_EQ(utf16_to_wide_cases[i].output_offset, offset);
				76	}
				77	#endif
				78	}
				79
				80	} // namaspace base