Switch the offset conversion routines from an "offsets point at characters"
worldview to an "offsets point between characters" worldview.
This more closely aligns with how the omnibox autocomplete code (which is what
this was originally written for) expects things to behave.
Direct fallout from this change:
* An input offset of 0 will always map to an output offset of 0.
* An input offset of (length of string) will always map to the length of the
output string, instead of npos.
* It's possible for multiple unique input offsets to map to a single non-npos
output offset, if they e.g. point to the start and end of a collapsed
sequence.
* Input offsets pointing into the middle of a completely-removed sequence may
not be set to npos if they fall on the boundaries of a subsequence processed
by the transformer. For example, when running FormatUrlWithOffsets() on
"http://user:[email protected]/" and directing it to omit both the scheme and
username/password, an input offset of "7" that points in between the scheme
and the username/password will be transformed to an output offset of 0
instead of npos.
Indirect fallout:
* A caller like SearchProvider::NavigationToMatch() will now mark certain
matches as "allowed to be default" that it didn't before. Specifically, if
the user's input string ends at the same point as the desired
|fill_into_edit|, the autocomplete offset will be calculated as (length of
string) instead of npos, and thus the match will be thought of as "inlinable"
and thus "allowed to be default".
BUG=284781
TEST=none
[email protected], [email protected]
Review URL: https://codereview.chromium.org/23619016
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@222426 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/base/strings/utf_offset_string_conversions_unittest.cc b/base/strings/utf_offset_string_conversions_unittest.cc
index 5545c0d2..7626e4c 100644
--- a/base/strings/utf_offset_string_conversions_unittest.cc
+++ b/base/strings/utf_offset_string_conversions_unittest.cc
@@ -23,13 +23,16 @@
size_t input_offset;
size_t output_offset;
} utf8_to_utf16_cases[] = {
- {"", 0, kNpos},
+ {"", 0, 0},
+ {"", kNpos, kNpos},
{"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
{"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
{"\xed\xb0\x80z", 3, 1},
{"A\xF0\x90\x8C\x80z", 1, 1},
{"A\xF0\x90\x8C\x80z", 2, kNpos},
{"A\xF0\x90\x8C\x80z", 5, 3},
+ {"A\xF0\x90\x8C\x80z", 6, 4},
+ {"A\xF0\x90\x8C\x80z", kNpos, kNpos},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_utf16_cases); ++i) {
size_t offset = utf8_to_utf16_cases[i].input_offset;
@@ -42,18 +45,22 @@
size_t input_offset;
size_t output_offset;
} utf16_to_utf8_cases[] = {
- {{}, 0, kNpos},
+ {{}, 0, 0},
// Converted to 3-byte utf-8 sequences
- {{0x5909, 0x63DB}, 2, kNpos},
+ {{0x5909, 0x63DB}, 3, kNpos},
+ {{0x5909, 0x63DB}, 2, 6},
{{0x5909, 0x63DB}, 1, 3},
+ {{0x5909, 0x63DB}, 0, 0},
// Converted to 2-byte utf-8 sequences
{{'A', 0x00bc, 0x00be, 'z'}, 1, 1},
{{'A', 0x00bc, 0x00be, 'z'}, 2, 3},
{{'A', 0x00bc, 0x00be, 'z'}, 3, 5},
+ {{'A', 0x00bc, 0x00be, 'z'}, 4, 6},
// Surrogate pair
{{'A', 0xd800, 0xdf00, 'z'}, 1, 1},
{{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos},
{{'A', 0xd800, 0xdf00, 'z'}, 3, 5},
+ {{'A', 0xd800, 0xdf00, 'z'}, 4, 6},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_utf8_cases); ++i) {
size_t offset = utf16_to_utf8_cases[i].input_offset;
@@ -73,10 +80,10 @@
size_t unlimited_count = 0;
for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
++ti) {
- if (*ti < kLimit && *ti != kNpos)
+ if (*ti != kNpos)
++unlimited_count;
}
- EXPECT_EQ(10U, unlimited_count);
+ EXPECT_EQ(11U, unlimited_count);
// Reverse the values in the vector and try again.
size_ts.clear();
@@ -87,10 +94,10 @@
unlimited_count = 0;
for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
++ti) {
- if (*ti < kLimit && *ti != kNpos)
+ if (*ti != kNpos)
++unlimited_count;
}
- EXPECT_EQ(10U, unlimited_count);
+ EXPECT_EQ(11U, unlimited_count);
}
TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
@@ -99,13 +106,13 @@
// 1: abcXXXdef ==> abcXdef
{
std::vector<size_t> offsets;
- for (size_t t = 0; t < 9; ++t)
+ for (size_t t = 0; t <= 9; ++t)
offsets.push_back(t);
{
OffsetAdjuster offset_adjuster(&offsets);
offset_adjuster.Add(OffsetAdjuster::Adjustment(3, 3, 1));
}
- size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
+ size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6, 7};
EXPECT_EQ(offsets.size(), arraysize(expected_1));
for (size_t i = 0; i < arraysize(expected_1); ++i)
EXPECT_EQ(expected_1[i], offsets[i]);
@@ -114,7 +121,7 @@
// 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
{
std::vector<size_t> offsets;
- for (size_t t = 0; t < 23; ++t)
+ for (size_t t = 0; t <= 23; ++t)
offsets.push_back(t);
{
OffsetAdjuster offset_adjuster(&offsets);
@@ -123,9 +130,10 @@
offset_adjuster.Add(OffsetAdjuster::Adjustment(10, 7, 4));
offset_adjuster.Add(OffsetAdjuster::Adjustment(20, 3, 1));
}
- size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6,
- kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12,
- 13, kNpos, kNpos};
+ size_t expected_2[] = {
+ 0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, kNpos, kNpos, kNpos,
+ kNpos, kNpos, kNpos, 10, 11, 12, 13, kNpos, kNpos, 14
+ };
EXPECT_EQ(offsets.size(), arraysize(expected_2));
for (size_t i = 0; i < arraysize(expected_2); ++i)
EXPECT_EQ(expected_2[i], offsets[i]);
@@ -134,7 +142,7 @@
// 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
{
std::vector<size_t> offsets;
- for (size_t t = 0; t < 17; ++t)
+ for (size_t t = 0; t <= 17; ++t)
offsets.push_back(t);
{
OffsetAdjuster offset_adjuster(&offsets);
@@ -143,8 +151,10 @@
offset_adjuster.Add(OffsetAdjuster::Adjustment(11, 3, 3));
offset_adjuster.Add(OffsetAdjuster::Adjustment(15, 2, 0));
}
- size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6,
- 7, 8, kNpos, kNpos, 11, kNpos, kNpos};
+ size_t expected_3[] = {
+ 0, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, 7, 8, kNpos, kNpos, 11,
+ 12, kNpos, 12
+ };
EXPECT_EQ(offsets.size(), arraysize(expected_3));
for (size_t i = 0; i < arraysize(expected_3); ++i)
EXPECT_EQ(expected_3[i], offsets[i]);