[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
avi | e3c6270 | 2015-12-24 06:52:19 | [diff] [blame] | 5 | #include <stddef.h> |
| 6 | |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 7 | #include <string> |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 8 | |
| 9 | #include "base/i18n/rtl.h" |
| 10 | #include "base/i18n/string_search.h" |
[email protected] | d1a5a2f | 2013-06-10 21:17:40 | [diff] [blame] | 11 | #include "base/strings/string16.h" |
[email protected] | a4ea1f1 | 2013-06-07 18:37:07 | [diff] [blame] | 12 | #include "base/strings/utf_string_conversions.h" |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 13 | #include "testing/gtest/include/gtest/gtest.h" |
[email protected] | 8bbf619 | 2013-07-18 11:14:04 | [diff] [blame] | 14 | #include "third_party/icu/source/i18n/unicode/usearch.h" |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 15 | |
| 16 | namespace base { |
| 17 | namespace i18n { |
| 18 | |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 19 | // Note on setting default locale for testing: The current default locale on |
| 20 | // the Mac trybot is en_US_POSIX, with which primary-level collation strength |
| 21 | // string search is case-sensitive, when normally it should be |
| 22 | // case-insensitive. In other locales (including en_US which English speakers |
| 23 | // in the U.S. use), this search would be case-insensitive as expected. |
| 24 | |
[email protected] | a214fbc | 2011-09-19 17:52:58 | [diff] [blame] | 25 | TEST(StringSearchTest, ASCII) { |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 26 | std::string default_locale(uloc_getDefault()); |
| 27 | bool locale_is_posix = (default_locale == "en_US_POSIX"); |
| 28 | if (locale_is_posix) |
| 29 | SetICUDefaultLocale("en_US"); |
| 30 | |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 31 | size_t index = 0; |
| 32 | size_t length = 0; |
| 33 | |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 34 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 35 | ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length)); |
| 36 | EXPECT_EQ(0U, index); |
| 37 | EXPECT_EQ(5U, length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 38 | |
| 39 | EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 40 | ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"), |
| 41 | &index, &length)); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 42 | |
| 43 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 44 | ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length)); |
| 45 | EXPECT_EQ(4U, index); |
| 46 | EXPECT_EQ(6U, length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 47 | |
| 48 | EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 49 | ASCIIToUTF16("searching within empty string"), string16(), |
| 50 | &index, &length)); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 51 | |
| 52 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 53 | string16(), ASCIIToUTF16("searching for empty string"), &index, &length)); |
| 54 | EXPECT_EQ(0U, index); |
| 55 | EXPECT_EQ(0U, length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 56 | |
| 57 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 58 | ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"), |
| 59 | &index, &length)); |
| 60 | EXPECT_EQ(0U, index); |
| 61 | EXPECT_EQ(18U, length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 62 | |
| 63 | if (locale_is_posix) |
| 64 | SetICUDefaultLocale(default_locale.data()); |
| 65 | } |
| 66 | |
[email protected] | a214fbc | 2011-09-19 17:52:58 | [diff] [blame] | 67 | TEST(StringSearchTest, UnicodeLocaleIndependent) { |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 68 | // Base characters |
| 69 | const string16 e_base = WideToUTF16(L"e"); |
| 70 | const string16 E_base = WideToUTF16(L"E"); |
| 71 | const string16 a_base = WideToUTF16(L"a"); |
| 72 | |
| 73 | // Composed characters |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 74 | const string16 e_with_acute_accent = WideToUTF16(L"\u00e9"); |
| 75 | const string16 E_with_acute_accent = WideToUTF16(L"\u00c9"); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 76 | const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); |
| 77 | const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 78 | const string16 a_with_acute_accent = WideToUTF16(L"\u00e1"); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 79 | |
| 80 | // Decomposed characters |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 81 | const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301"); |
| 82 | const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301"); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 83 | const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); |
| 84 | const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 85 | const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301"); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 86 | |
| 87 | std::string default_locale(uloc_getDefault()); |
| 88 | bool locale_is_posix = (default_locale == "en_US_POSIX"); |
| 89 | if (locale_is_posix) |
| 90 | SetICUDefaultLocale("en_US"); |
| 91 | |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 92 | size_t index = 0; |
| 93 | size_t length = 0; |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 94 | |
| 95 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 96 | e_base, e_with_acute_accent, &index, &length)); |
| 97 | EXPECT_EQ(0U, index); |
| 98 | EXPECT_EQ(e_with_acute_accent.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 99 | |
| 100 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 101 | e_with_acute_accent, e_base, &index, &length)); |
| 102 | EXPECT_EQ(0U, index); |
| 103 | EXPECT_EQ(e_base.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 104 | |
| 105 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 106 | e_base, e_with_acute_combining_mark, &index, &length)); |
| 107 | EXPECT_EQ(0U, index); |
| 108 | EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 109 | |
| 110 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 111 | e_with_acute_combining_mark, e_base, &index, &length)); |
| 112 | EXPECT_EQ(0U, index); |
| 113 | EXPECT_EQ(e_base.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 114 | |
| 115 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 116 | e_with_acute_combining_mark, e_with_acute_accent, |
| 117 | &index, &length)); |
| 118 | EXPECT_EQ(0U, index); |
| 119 | EXPECT_EQ(e_with_acute_accent.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 120 | |
| 121 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 122 | e_with_acute_accent, e_with_acute_combining_mark, |
| 123 | &index, &length)); |
| 124 | EXPECT_EQ(0U, index); |
| 125 | EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 126 | |
| 127 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 128 | e_with_acute_combining_mark, e_with_grave_combining_mark, |
| 129 | &index, &length)); |
| 130 | EXPECT_EQ(0U, index); |
| 131 | EXPECT_EQ(e_with_grave_combining_mark.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 132 | |
| 133 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 134 | e_with_grave_combining_mark, e_with_acute_combining_mark, |
| 135 | &index, &length)); |
| 136 | EXPECT_EQ(0U, index); |
| 137 | EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 138 | |
| 139 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 140 | e_with_acute_combining_mark, e_with_grave_accent, &index, &length)); |
| 141 | EXPECT_EQ(0U, index); |
| 142 | EXPECT_EQ(e_with_grave_accent.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 143 | |
| 144 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 145 | e_with_grave_accent, e_with_acute_combining_mark, &index, &length)); |
| 146 | EXPECT_EQ(0U, index); |
| 147 | EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 148 | |
| 149 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 150 | E_with_acute_accent, e_with_acute_accent, &index, &length)); |
| 151 | EXPECT_EQ(0U, index); |
| 152 | EXPECT_EQ(e_with_acute_accent.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 153 | |
| 154 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 155 | E_with_grave_accent, e_with_acute_accent, &index, &length)); |
| 156 | EXPECT_EQ(0U, index); |
| 157 | EXPECT_EQ(e_with_acute_accent.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 158 | |
| 159 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 160 | E_with_acute_combining_mark, e_with_grave_accent, &index, &length)); |
| 161 | EXPECT_EQ(0U, index); |
| 162 | EXPECT_EQ(e_with_grave_accent.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 163 | |
| 164 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 165 | E_with_grave_combining_mark, e_with_acute_accent, &index, &length)); |
| 166 | EXPECT_EQ(0U, index); |
| 167 | EXPECT_EQ(e_with_acute_accent.size(), length); |
| 168 | |
| 169 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| 170 | E_base, e_with_grave_accent, &index, &length)); |
| 171 | EXPECT_EQ(0U, index); |
| 172 | EXPECT_EQ(e_with_grave_accent.size(), length); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 173 | |
| 174 | EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 175 | a_with_acute_accent, e_with_acute_accent, &index, &length)); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 176 | |
| 177 | EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 178 | a_with_acute_combining_mark, e_with_acute_combining_mark, |
| 179 | &index, &length)); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 180 | |
| 181 | if (locale_is_posix) |
| 182 | SetICUDefaultLocale(default_locale.data()); |
| 183 | } |
| 184 | |
[email protected] | a214fbc | 2011-09-19 17:52:58 | [diff] [blame] | 185 | TEST(StringSearchTest, UnicodeLocaleDependent) { |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 186 | // Base characters |
| 187 | const string16 a_base = WideToUTF16(L"a"); |
| 188 | |
| 189 | // Composed characters |
| 190 | const string16 a_with_ring = WideToUTF16(L"\u00e5"); |
| 191 | |
| 192 | EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 193 | a_base, a_with_ring, NULL, NULL)); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 194 | |
| 195 | const char* default_locale = uloc_getDefault(); |
| 196 | SetICUDefaultLocale("da"); |
| 197 | |
| 198 | EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
[email protected] | 4de8856 | 2012-09-09 16:54:50 | [diff] [blame] | 199 | a_base, a_with_ring, NULL, NULL)); |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 200 | |
| 201 | SetICUDefaultLocale(default_locale); |
| 202 | } |
| 203 | |
[email protected] | df69c34 | 2013-07-11 09:32:31 | [diff] [blame] | 204 | TEST(StringSearchTest, FixedPatternMultipleSearch) { |
| 205 | std::string default_locale(uloc_getDefault()); |
| 206 | bool locale_is_posix = (default_locale == "en_US_POSIX"); |
| 207 | if (locale_is_posix) |
| 208 | SetICUDefaultLocale("en_US"); |
| 209 | |
| 210 | size_t index = 0; |
| 211 | size_t length = 0; |
| 212 | |
| 213 | // Search "hello" over multiple texts. |
| 214 | FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello")); |
| 215 | EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length)); |
| 216 | EXPECT_EQ(2U, index); |
| 217 | EXPECT_EQ(5U, length); |
| 218 | EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length)); |
| 219 | EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length)); |
| 220 | EXPECT_EQ(0U, index); |
| 221 | EXPECT_EQ(5U, length); |
| 222 | |
| 223 | if (locale_is_posix) |
| 224 | SetICUDefaultLocale(default_locale.data()); |
| 225 | } |
| 226 | |
[email protected] | 5a3b50a | 2011-09-16 19:48:32 | [diff] [blame] | 227 | } // namespace i18n |
| 228 | } // namespace base |