blob: f1c9d193f27f6d03acfe827faba05025d9f01cbf [file] [log] [blame]
[email protected]5a3b50a2011-09-16 19:48:321// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
avie3c62702015-12-24 06:52:195#include <stddef.h>
6
[email protected]5a3b50a2011-09-16 19:48:327#include <string>
[email protected]5a3b50a2011-09-16 19:48:328
9#include "base/i18n/rtl.h"
10#include "base/i18n/string_search.h"
[email protected]d1a5a2f2013-06-10 21:17:4011#include "base/strings/string16.h"
[email protected]a4ea1f12013-06-07 18:37:0712#include "base/strings/utf_string_conversions.h"
[email protected]5a3b50a2011-09-16 19:48:3213#include "testing/gtest/include/gtest/gtest.h"
[email protected]8bbf6192013-07-18 11:14:0414#include "third_party/icu/source/i18n/unicode/usearch.h"
[email protected]5a3b50a2011-09-16 19:48:3215
16namespace base {
17namespace i18n {
18
[email protected]5a3b50a2011-09-16 19:48:3219// Note on setting default locale for testing: The current default locale on
20// the Mac trybot is en_US_POSIX, with which primary-level collation strength
21// string search is case-sensitive, when normally it should be
22// case-insensitive. In other locales (including en_US which English speakers
23// in the U.S. use), this search would be case-insensitive as expected.
24
[email protected]a214fbc2011-09-19 17:52:5825TEST(StringSearchTest, ASCII) {
[email protected]5a3b50a2011-09-16 19:48:3226 std::string default_locale(uloc_getDefault());
27 bool locale_is_posix = (default_locale == "en_US_POSIX");
28 if (locale_is_posix)
29 SetICUDefaultLocale("en_US");
30
[email protected]4de88562012-09-09 16:54:5031 size_t index = 0;
32 size_t length = 0;
33
[email protected]5a3b50a2011-09-16 19:48:3234 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:5035 ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
36 EXPECT_EQ(0U, index);
37 EXPECT_EQ(5U, length);
[email protected]5a3b50a2011-09-16 19:48:3238
39 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:5040 ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
41 &index, &length));
[email protected]5a3b50a2011-09-16 19:48:3242
43 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:5044 ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
45 EXPECT_EQ(4U, index);
46 EXPECT_EQ(6U, length);
[email protected]5a3b50a2011-09-16 19:48:3247
48 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:5049 ASCIIToUTF16("searching within empty string"), string16(),
50 &index, &length));
[email protected]5a3b50a2011-09-16 19:48:3251
52 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:5053 string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
54 EXPECT_EQ(0U, index);
55 EXPECT_EQ(0U, length);
[email protected]5a3b50a2011-09-16 19:48:3256
57 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:5058 ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
59 &index, &length));
60 EXPECT_EQ(0U, index);
61 EXPECT_EQ(18U, length);
[email protected]5a3b50a2011-09-16 19:48:3262
63 if (locale_is_posix)
64 SetICUDefaultLocale(default_locale.data());
65}
66
[email protected]a214fbc2011-09-19 17:52:5867TEST(StringSearchTest, UnicodeLocaleIndependent) {
[email protected]5a3b50a2011-09-16 19:48:3268 // Base characters
69 const string16 e_base = WideToUTF16(L"e");
70 const string16 E_base = WideToUTF16(L"E");
71 const string16 a_base = WideToUTF16(L"a");
72
73 // Composed characters
[email protected]4de88562012-09-09 16:54:5074 const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
75 const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
[email protected]5a3b50a2011-09-16 19:48:3276 const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
77 const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
[email protected]4de88562012-09-09 16:54:5078 const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
[email protected]5a3b50a2011-09-16 19:48:3279
80 // Decomposed characters
[email protected]4de88562012-09-09 16:54:5081 const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
82 const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
[email protected]5a3b50a2011-09-16 19:48:3283 const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
84 const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
[email protected]4de88562012-09-09 16:54:5085 const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
[email protected]5a3b50a2011-09-16 19:48:3286
87 std::string default_locale(uloc_getDefault());
88 bool locale_is_posix = (default_locale == "en_US_POSIX");
89 if (locale_is_posix)
90 SetICUDefaultLocale("en_US");
91
[email protected]4de88562012-09-09 16:54:5092 size_t index = 0;
93 size_t length = 0;
[email protected]5a3b50a2011-09-16 19:48:3294
95 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:5096 e_base, e_with_acute_accent, &index, &length));
97 EXPECT_EQ(0U, index);
98 EXPECT_EQ(e_with_acute_accent.size(), length);
[email protected]5a3b50a2011-09-16 19:48:3299
100 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50101 e_with_acute_accent, e_base, &index, &length));
102 EXPECT_EQ(0U, index);
103 EXPECT_EQ(e_base.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32104
105 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50106 e_base, e_with_acute_combining_mark, &index, &length));
107 EXPECT_EQ(0U, index);
108 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32109
110 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50111 e_with_acute_combining_mark, e_base, &index, &length));
112 EXPECT_EQ(0U, index);
113 EXPECT_EQ(e_base.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32114
115 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50116 e_with_acute_combining_mark, e_with_acute_accent,
117 &index, &length));
118 EXPECT_EQ(0U, index);
119 EXPECT_EQ(e_with_acute_accent.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32120
121 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50122 e_with_acute_accent, e_with_acute_combining_mark,
123 &index, &length));
124 EXPECT_EQ(0U, index);
125 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32126
127 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50128 e_with_acute_combining_mark, e_with_grave_combining_mark,
129 &index, &length));
130 EXPECT_EQ(0U, index);
131 EXPECT_EQ(e_with_grave_combining_mark.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32132
133 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50134 e_with_grave_combining_mark, e_with_acute_combining_mark,
135 &index, &length));
136 EXPECT_EQ(0U, index);
137 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32138
139 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50140 e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
141 EXPECT_EQ(0U, index);
142 EXPECT_EQ(e_with_grave_accent.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32143
144 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50145 e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
146 EXPECT_EQ(0U, index);
147 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32148
149 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50150 E_with_acute_accent, e_with_acute_accent, &index, &length));
151 EXPECT_EQ(0U, index);
152 EXPECT_EQ(e_with_acute_accent.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32153
154 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50155 E_with_grave_accent, e_with_acute_accent, &index, &length));
156 EXPECT_EQ(0U, index);
157 EXPECT_EQ(e_with_acute_accent.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32158
159 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50160 E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
161 EXPECT_EQ(0U, index);
162 EXPECT_EQ(e_with_grave_accent.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32163
164 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50165 E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
166 EXPECT_EQ(0U, index);
167 EXPECT_EQ(e_with_acute_accent.size(), length);
168
169 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
170 E_base, e_with_grave_accent, &index, &length));
171 EXPECT_EQ(0U, index);
172 EXPECT_EQ(e_with_grave_accent.size(), length);
[email protected]5a3b50a2011-09-16 19:48:32173
174 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50175 a_with_acute_accent, e_with_acute_accent, &index, &length));
[email protected]5a3b50a2011-09-16 19:48:32176
177 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50178 a_with_acute_combining_mark, e_with_acute_combining_mark,
179 &index, &length));
[email protected]5a3b50a2011-09-16 19:48:32180
181 if (locale_is_posix)
182 SetICUDefaultLocale(default_locale.data());
183}
184
[email protected]a214fbc2011-09-19 17:52:58185TEST(StringSearchTest, UnicodeLocaleDependent) {
[email protected]5a3b50a2011-09-16 19:48:32186 // Base characters
187 const string16 a_base = WideToUTF16(L"a");
188
189 // Composed characters
190 const string16 a_with_ring = WideToUTF16(L"\u00e5");
191
192 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50193 a_base, a_with_ring, NULL, NULL));
[email protected]5a3b50a2011-09-16 19:48:32194
195 const char* default_locale = uloc_getDefault();
196 SetICUDefaultLocale("da");
197
198 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
[email protected]4de88562012-09-09 16:54:50199 a_base, a_with_ring, NULL, NULL));
[email protected]5a3b50a2011-09-16 19:48:32200
201 SetICUDefaultLocale(default_locale);
202}
203
[email protected]df69c342013-07-11 09:32:31204TEST(StringSearchTest, FixedPatternMultipleSearch) {
205 std::string default_locale(uloc_getDefault());
206 bool locale_is_posix = (default_locale == "en_US_POSIX");
207 if (locale_is_posix)
208 SetICUDefaultLocale("en_US");
209
210 size_t index = 0;
211 size_t length = 0;
212
213 // Search "hello" over multiple texts.
214 FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
215 EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
216 EXPECT_EQ(2U, index);
217 EXPECT_EQ(5U, length);
218 EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
219 EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
220 EXPECT_EQ(0U, index);
221 EXPECT_EQ(5U, length);
222
223 if (locale_is_posix)
224 SetICUDefaultLocale(default_locale.data());
225}
226
[email protected]5a3b50a2011-09-16 19:48:32227} // namespace i18n
228} // namespace base