blob: 00d87d3b8d7a303e40165ba90da9e6c9baf109cb [file] [log] [blame]
[email protected]b9f93832009-11-13 19:27:481// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/logging.h"
6#include "base/string_piece.h"
7#include "base/utf_offset_string_conversions.h"
8#include "testing/gtest/include/gtest/gtest.h"
9
10namespace base {
11
12namespace {
13
14// Given a null-terminated string of wchar_t with each wchar_t representing
15// a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
16// Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
17// should be represented as a surrogate pair (two UTF-16 units)
18// *even* where wchar_t is 32-bit (Linux and Mac).
19//
20// This is to help write tests for functions with string16 params until
21// the C++ 0x UTF-16 literal is well-supported by compilers.
22string16 BuildString16(const wchar_t* s) {
23#if defined(WCHAR_T_IS_UTF16)
24 return string16(s);
25#elif defined(WCHAR_T_IS_UTF32)
26 string16 u16;
27 while (*s != 0) {
28 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
29 u16.push_back(*s++);
30 }
31 return u16;
32#endif
33}
34
35} // namespace
36
37TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
38 struct UTF8ToWideCase {
39 const char* utf8;
40 size_t input_offset;
41 size_t output_offset;
42 } utf8_to_wide_cases[] = {
43 {"", 0, std::wstring::npos},
44 {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos},
45 {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
46 {"\xed\xb0\x80z", 3, 0},
47 {"A\xF0\x90\x8C\x80z", 1, 1},
48 {"A\xF0\x90\x8C\x80z", 2, std::wstring::npos},
49#if defined(WCHAR_T_IS_UTF16)
50 {"A\xF0\x90\x8C\x80z", 5, 3},
51#elif defined(WCHAR_T_IS_UTF32)
52 {"A\xF0\x90\x8C\x80z", 5, 2},
53#endif
54 };
55 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_wide_cases); ++i) {
56 size_t offset = utf8_to_wide_cases[i].input_offset;
57 UTF8ToWideAndAdjustOffset(utf8_to_wide_cases[i].utf8, &offset);
58 EXPECT_EQ(utf8_to_wide_cases[i].output_offset, offset);
59 }
60
61#if defined(WCHAR_T_IS_UTF32)
62 struct UTF16ToWideCase {
63 const wchar_t* wide;
64 size_t input_offset;
65 size_t output_offset;
66 } utf16_to_wide_cases[] = {
67 {L"\xD840\xDC00\x4E00", 0, 0},
68 {L"\xD840\xDC00\x4E00", 1, std::wstring::npos},
69 {L"\xD840\xDC00\x4E00", 2, 1},
70 };
71 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_wide_cases); ++i) {
72 size_t offset = utf16_to_wide_cases[i].input_offset;
73 UTF16ToWideAndAdjustOffset(BuildString16(utf16_to_wide_cases[i].wide),
74 &offset);
75 EXPECT_EQ(utf16_to_wide_cases[i].output_offset, offset);
76 }
77#endif
78}
79
80} // namaspace base