[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 5 | #include "base/memory/scoped_ptr.h" |
[email protected] | 30fe1f9 | 2013-06-12 16:34:34 | [diff] [blame] | 6 | #include "base/strings/string_util.h" |
[email protected] | 74ebfb1 | 2013-06-07 20:48:00 | [diff] [blame] | 7 | #include "base/strings/utf_string_conversions.h" |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 8 | #include "content/common/android/address_parser.h" |
| 9 | #include "content/common/android/address_parser_internal.h" |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 10 | #include "testing/gtest/include/gtest/gtest.h" |
| 11 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 12 | using namespace content::address_parser; |
| 13 | using namespace content::address_parser::internal; |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 14 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 15 | class AddressParserTest : public testing::Test { |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 16 | public: |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 17 | AddressParserTest() {} |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 18 | |
[email protected] | 8790210c | 2013-12-02 05:29:53 | [diff] [blame] | 19 | void TokenizeWords(const base::string16& content, WordList* words) const { |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 20 | String16Tokenizer tokenizer(content.begin(), content.end(), |
[email protected] | 8790210c | 2013-12-02 05:29:53 | [diff] [blame] | 21 | base::kWhitespaceUTF16); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 22 | while (tokenizer.GetNext()) { |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 23 | words->push_back(Word(tokenizer.token_begin(), tokenizer.token_end())); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 24 | } |
| 25 | } |
| 26 | |
| 27 | std::string GetHouseNumber(const std::string& content) const { |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 28 | base::string16 content_16 = base::UTF8ToUTF16(content); |
[email protected] | fcf75d4 | 2013-12-03 20:11:26 | [diff] [blame] | 29 | base::string16 result; |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 30 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 31 | HouseNumberParser parser; |
| 32 | Word word; |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 33 | if (parser.Parse(content_16.begin(), content_16.end(), &word)) |
[email protected] | fcf75d4 | 2013-12-03 20:11:26 | [diff] [blame] | 34 | result = base::string16(word.begin, word.end); |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 35 | return base::UTF16ToUTF8(result); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 36 | } |
| 37 | |
| 38 | bool ContainsHouseNumber(const std::string& content) const { |
| 39 | return !GetHouseNumber(content).empty(); |
| 40 | } |
| 41 | |
| 42 | bool GetState(const std::string& state, size_t* state_index) const { |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 43 | base::string16 state_16 = base::UTF8ToUTF16(state); |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 44 | String16Tokenizer tokenizer(state_16.begin(), state_16.end(), |
[email protected] | 8790210c | 2013-12-02 05:29:53 | [diff] [blame] | 45 | base::kWhitespaceUTF16); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 46 | if (!tokenizer.GetNext()) |
| 47 | return false; |
| 48 | |
| 49 | size_t state_last_word; |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 50 | WordList words; |
| 51 | words.push_back(Word(tokenizer.token_begin(), tokenizer.token_end())); |
| 52 | return FindStateStartingInWord(&words, 0, &state_last_word, &tokenizer, |
| 53 | state_index); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 54 | } |
| 55 | |
| 56 | bool IsState(const std::string& state) const { |
| 57 | size_t state_index; |
| 58 | return GetState(state, &state_index); |
| 59 | } |
| 60 | |
| 61 | bool IsZipValid(const std::string& zip, const std::string& state) const { |
| 62 | size_t state_index; |
| 63 | EXPECT_TRUE(GetState(state, &state_index)); |
| 64 | |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 65 | base::string16 zip_16 = base::UTF8ToUTF16(zip); |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 66 | WordList words; |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 67 | TokenizeWords(zip_16, &words); |
| 68 | EXPECT_TRUE(words.size() == 1); |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 69 | return ::IsZipValid(words.front(), state_index); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 70 | } |
| 71 | |
| 72 | bool IsLocationName(const std::string& street) const { |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 73 | base::string16 street_16 = base::UTF8ToUTF16(street); |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 74 | WordList words; |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 75 | TokenizeWords(street_16, &words); |
| 76 | EXPECT_TRUE(words.size() == 1); |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 77 | return IsValidLocationName(words.front()); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 78 | } |
| 79 | |
| 80 | std::string FindAddress(const std::string& content) const { |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 81 | base::string16 content_16 = base::UTF8ToUTF16(content); |
[email protected] | fcf75d4 | 2013-12-03 20:11:26 | [diff] [blame] | 82 | base::string16 result_16; |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 83 | size_t start, end; |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 84 | if (::FindAddress(content_16.begin(), content_16.end(), &start, &end)) |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 85 | result_16 = content_16.substr(start, end - start); |
[email protected] | 3295612 | 2013-12-25 07:29:24 | [diff] [blame] | 86 | return base::UTF16ToUTF8(result_16); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 87 | } |
| 88 | |
| 89 | bool ContainsAddress(const std::string& content) const { |
| 90 | return !FindAddress(content).empty(); |
| 91 | } |
| 92 | |
| 93 | bool IsAddress(const std::string& content) const { |
| 94 | return FindAddress(content) == content; |
| 95 | } |
| 96 | |
| 97 | private: |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 98 | DISALLOW_COPY_AND_ASSIGN(AddressParserTest); |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 99 | }; |
| 100 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 101 | TEST_F(AddressParserTest, HouseNumber) { |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 102 | // Tests cases with valid home numbers. |
| 103 | EXPECT_EQ(GetHouseNumber("4 my house"), "4"); |
| 104 | EXPECT_EQ(GetHouseNumber("Something 4 my house"), "4"); |
| 105 | EXPECT_EQ(GetHouseNumber("4"), "4"); |
| 106 | EXPECT_EQ(GetHouseNumber(" 4,5"), "4"); |
| 107 | EXPECT_EQ(GetHouseNumber("one"), "one"); |
| 108 | EXPECT_EQ(GetHouseNumber("Number One somewhere"), "One"); |
| 109 | EXPECT_EQ(GetHouseNumber("Testing \n4\n"), "4"); |
| 110 | EXPECT_EQ(GetHouseNumber("Foo 1ST"), "1ST"); |
| 111 | EXPECT_EQ(GetHouseNumber("Bar 2nd"), "2nd"); |
| 112 | EXPECT_EQ(GetHouseNumber("Blah 3rd"), "3rd"); |
| 113 | EXPECT_EQ(GetHouseNumber("4th"), "4th"); |
| 114 | EXPECT_EQ(GetHouseNumber("Blah 11th"), "11th"); |
| 115 | EXPECT_EQ(GetHouseNumber("Blah 12th meh"), "12th"); |
| 116 | EXPECT_EQ(GetHouseNumber("Blah 13th moo"), "13th"); |
| 117 | EXPECT_EQ(GetHouseNumber("211st"), "211st"); |
| 118 | EXPECT_EQ(GetHouseNumber("1A"), "1A"); |
| 119 | EXPECT_EQ(GetHouseNumber("number:35"), "35"); |
| 120 | EXPECT_EQ(GetHouseNumber("five digits at most: 12345"), "12345"); |
| 121 | EXPECT_EQ(GetHouseNumber("'123'"), "123"); |
| 122 | EXPECT_EQ(GetHouseNumber("\"123\""), "123"); |
| 123 | EXPECT_EQ(GetHouseNumber("\"123, something\""), "123"); |
| 124 | EXPECT_EQ(GetHouseNumber("Testing 12-34"), "12-34"); |
| 125 | EXPECT_EQ(GetHouseNumber("Testing 12-34c,d"), "12-34c"); |
| 126 | EXPECT_EQ(GetHouseNumber("住所は:76 Buckingham Palace Roadです"), "76"); |
| 127 | |
| 128 | // Tests cases without valid home numbers. |
| 129 | EXPECT_FALSE(ContainsHouseNumber("0th")); |
| 130 | EXPECT_FALSE(ContainsHouseNumber("25st")); |
| 131 | EXPECT_FALSE(ContainsHouseNumber("111th")); |
| 132 | EXPECT_FALSE(ContainsHouseNumber("011th")); |
| 133 | EXPECT_FALSE(ContainsHouseNumber("27AZ")); |
| 134 | EXPECT_FALSE(ContainsHouseNumber("22ºC")); |
| 135 | EXPECT_FALSE(ContainsHouseNumber("3.141592")); |
| 136 | EXPECT_FALSE(ContainsHouseNumber("more than five digits: 123456")); |
| 137 | EXPECT_FALSE(ContainsHouseNumber("kjhdfkajsdhf98uf93h")); |
| 138 | EXPECT_FALSE(ContainsHouseNumber("これはテストです。")); |
| 139 | EXPECT_FALSE(ContainsHouseNumber("Number On")); |
| 140 | EXPECT_FALSE(ContainsHouseNumber("2: foo")); |
| 141 | EXPECT_FALSE(ContainsHouseNumber("12-")); |
| 142 | EXPECT_FALSE(ContainsHouseNumber("\n\"' \t, ")); |
| 143 | EXPECT_FALSE(ContainsHouseNumber("")); |
| 144 | } |
| 145 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 146 | TEST_F(AddressParserTest, FindState) { |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 147 | // The complete set of state codes and names is tested together with their |
| 148 | // returned state indices in the zip code test. |
| 149 | EXPECT_TRUE(IsState("CALIFORNIA")); |
| 150 | EXPECT_TRUE(IsState("ca")); |
| 151 | |
| 152 | EXPECT_FALSE(IsState("californi")); |
| 153 | EXPECT_FALSE(IsState("northern mariana")); |
| 154 | EXPECT_FALSE(IsState("northern mariana island")); |
| 155 | EXPECT_FALSE(IsState("zz")); |
| 156 | } |
| 157 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 158 | TEST_F(AddressParserTest, ZipCode) { |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 159 | EXPECT_TRUE(IsZipValid("90000", "CA")); |
| 160 | EXPECT_TRUE(IsZipValid("01234", "MA")); |
| 161 | EXPECT_TRUE(IsZipValid("99999-9999", "Alaska")); |
| 162 | |
| 163 | EXPECT_FALSE(IsZipValid("999999999", "Alaska")); |
| 164 | EXPECT_FALSE(IsZipValid("9999-99999", "Alaska")); |
| 165 | EXPECT_FALSE(IsZipValid("999999999-", "Alaska")); |
| 166 | EXPECT_FALSE(IsZipValid("99999-999a", "Alaska")); |
| 167 | EXPECT_FALSE(IsZipValid("99999--9999", "Alaska")); |
| 168 | EXPECT_FALSE(IsZipValid("90000o", "CA")); |
| 169 | EXPECT_FALSE(IsZipValid("01234", "CA")); |
| 170 | EXPECT_FALSE(IsZipValid("01234-", "MA")); |
| 171 | |
| 172 | // Test the state index against the zip range table. |
| 173 | EXPECT_TRUE(IsZipValid("99000", "AK")); |
| 174 | EXPECT_TRUE(IsZipValid("99000", "Alaska")); |
| 175 | EXPECT_TRUE(IsZipValid("35000", "AL")); |
| 176 | EXPECT_TRUE(IsZipValid("36000", "Alabama")); |
| 177 | EXPECT_TRUE(IsZipValid("71000", "AR")); |
| 178 | EXPECT_TRUE(IsZipValid("72000", "Arkansas")); |
| 179 | EXPECT_TRUE(IsZipValid("96000", "AS")); |
| 180 | EXPECT_TRUE(IsZipValid("96000", "American Samoa")); |
| 181 | EXPECT_TRUE(IsZipValid("85000", "AZ")); |
| 182 | EXPECT_TRUE(IsZipValid("86000", "Arizona")); |
| 183 | EXPECT_TRUE(IsZipValid("90000", "CA")); |
| 184 | EXPECT_TRUE(IsZipValid("96000", "California")); |
| 185 | EXPECT_TRUE(IsZipValid("80000", "CO")); |
| 186 | EXPECT_TRUE(IsZipValid("81000", "Colorado")); |
| 187 | EXPECT_TRUE(IsZipValid("06000", "CT")); |
| 188 | EXPECT_TRUE(IsZipValid("06000", "Connecticut")); |
| 189 | EXPECT_TRUE(IsZipValid("20000", "DC")); |
| 190 | EXPECT_TRUE(IsZipValid("20000", "District of Columbia")); |
| 191 | EXPECT_TRUE(IsZipValid("19000", "DE")); |
| 192 | EXPECT_TRUE(IsZipValid("19000", "Delaware")); |
| 193 | EXPECT_TRUE(IsZipValid("32000", "FL")); |
| 194 | EXPECT_TRUE(IsZipValid("34000", "Florida")); |
| 195 | EXPECT_TRUE(IsZipValid("96000", "FM")); |
| 196 | EXPECT_TRUE(IsZipValid("96000", "Federated States of Micronesia")); |
| 197 | EXPECT_TRUE(IsZipValid("30000", "GA")); |
| 198 | EXPECT_TRUE(IsZipValid("31000", "Georgia")); |
| 199 | EXPECT_TRUE(IsZipValid("96000", "GU")); |
| 200 | EXPECT_TRUE(IsZipValid("96000", "Guam")); |
| 201 | EXPECT_TRUE(IsZipValid("96000", "HI")); |
| 202 | EXPECT_TRUE(IsZipValid("96000", "Hawaii")); |
| 203 | EXPECT_TRUE(IsZipValid("50000", "IA")); |
| 204 | EXPECT_TRUE(IsZipValid("52000", "Iowa")); |
| 205 | EXPECT_TRUE(IsZipValid("83000", "ID")); |
| 206 | EXPECT_TRUE(IsZipValid("83000", "Idaho")); |
| 207 | EXPECT_TRUE(IsZipValid("60000", "IL")); |
| 208 | EXPECT_TRUE(IsZipValid("62000", "Illinois")); |
| 209 | EXPECT_TRUE(IsZipValid("46000", "IN")); |
| 210 | EXPECT_TRUE(IsZipValid("47000", "Indiana")); |
| 211 | EXPECT_TRUE(IsZipValid("66000", "KS")); |
| 212 | EXPECT_TRUE(IsZipValid("67000", "Kansas")); |
| 213 | EXPECT_TRUE(IsZipValid("40000", "KY")); |
| 214 | EXPECT_TRUE(IsZipValid("42000", "Kentucky")); |
| 215 | EXPECT_TRUE(IsZipValid("70000", "LA")); |
| 216 | EXPECT_TRUE(IsZipValid("71000", "Louisiana")); |
| 217 | EXPECT_TRUE(IsZipValid("01000", "MA")); |
| 218 | EXPECT_TRUE(IsZipValid("02000", "Massachusetts")); |
| 219 | EXPECT_TRUE(IsZipValid("20000", "MD")); |
| 220 | EXPECT_TRUE(IsZipValid("21000", "Maryland")); |
| 221 | EXPECT_TRUE(IsZipValid("03000", "ME")); |
| 222 | EXPECT_TRUE(IsZipValid("04000", "Maine")); |
| 223 | EXPECT_TRUE(IsZipValid("96000", "MH")); |
| 224 | EXPECT_TRUE(IsZipValid("96000", "Marshall Islands")); |
| 225 | EXPECT_TRUE(IsZipValid("48000", "MI")); |
| 226 | EXPECT_TRUE(IsZipValid("49000", "Michigan")); |
| 227 | EXPECT_TRUE(IsZipValid("55000", "MN")); |
| 228 | EXPECT_TRUE(IsZipValid("56000", "Minnesota")); |
| 229 | EXPECT_TRUE(IsZipValid("63000", "MO")); |
| 230 | EXPECT_TRUE(IsZipValid("65000", "Missouri")); |
| 231 | EXPECT_TRUE(IsZipValid("96000", "MP")); |
| 232 | EXPECT_TRUE(IsZipValid("96000", "Northern Mariana Islands")); |
| 233 | EXPECT_TRUE(IsZipValid("38000", "MS")); |
| 234 | EXPECT_TRUE(IsZipValid("39000", "Mississippi")); |
| 235 | EXPECT_TRUE(IsZipValid("55000", "MT")); |
| 236 | EXPECT_TRUE(IsZipValid("56000", "Montana")); |
| 237 | EXPECT_TRUE(IsZipValid("27000", "NC")); |
| 238 | EXPECT_TRUE(IsZipValid("28000", "North Carolina")); |
| 239 | EXPECT_TRUE(IsZipValid("58000", "ND")); |
| 240 | EXPECT_TRUE(IsZipValid("58000", "North Dakota")); |
| 241 | EXPECT_TRUE(IsZipValid("68000", "NE")); |
| 242 | EXPECT_TRUE(IsZipValid("69000", "Nebraska")); |
| 243 | EXPECT_TRUE(IsZipValid("03000", "NH")); |
| 244 | EXPECT_TRUE(IsZipValid("04000", "New Hampshire")); |
| 245 | EXPECT_TRUE(IsZipValid("07000", "NJ")); |
| 246 | EXPECT_TRUE(IsZipValid("08000", "New Jersey")); |
| 247 | EXPECT_TRUE(IsZipValid("87000", "NM")); |
| 248 | EXPECT_TRUE(IsZipValid("88000", "New Mexico")); |
| 249 | EXPECT_TRUE(IsZipValid("88000", "NV")); |
| 250 | EXPECT_TRUE(IsZipValid("89000", "Nevada")); |
| 251 | EXPECT_TRUE(IsZipValid("10000", "NY")); |
| 252 | EXPECT_TRUE(IsZipValid("14000", "New York")); |
| 253 | EXPECT_TRUE(IsZipValid("43000", "OH")); |
| 254 | EXPECT_TRUE(IsZipValid("45000", "Ohio")); |
| 255 | EXPECT_TRUE(IsZipValid("73000", "OK")); |
| 256 | EXPECT_TRUE(IsZipValid("74000", "Oklahoma")); |
| 257 | EXPECT_TRUE(IsZipValid("97000", "OR")); |
| 258 | EXPECT_TRUE(IsZipValid("97000", "Oregon")); |
| 259 | EXPECT_TRUE(IsZipValid("15000", "PA")); |
| 260 | EXPECT_TRUE(IsZipValid("19000", "Pennsylvania")); |
| 261 | EXPECT_TRUE(IsZipValid("06000", "PR")); |
| 262 | EXPECT_TRUE(IsZipValid("06000", "Puerto Rico")); |
| 263 | EXPECT_TRUE(IsZipValid("96000", "PW")); |
| 264 | EXPECT_TRUE(IsZipValid("96000", "Palau")); |
| 265 | EXPECT_TRUE(IsZipValid("02000", "RI")); |
| 266 | EXPECT_TRUE(IsZipValid("02000", "Rhode Island")); |
| 267 | EXPECT_TRUE(IsZipValid("29000", "SC")); |
| 268 | EXPECT_TRUE(IsZipValid("29000", "South Carolina")); |
| 269 | EXPECT_TRUE(IsZipValid("57000", "SD")); |
| 270 | EXPECT_TRUE(IsZipValid("57000", "South Dakota")); |
| 271 | EXPECT_TRUE(IsZipValid("37000", "TN")); |
| 272 | EXPECT_TRUE(IsZipValid("38000", "Tennessee")); |
| 273 | EXPECT_TRUE(IsZipValid("75000", "TX")); |
| 274 | EXPECT_TRUE(IsZipValid("79000", "Texas")); |
| 275 | EXPECT_TRUE(IsZipValid("84000", "UT")); |
| 276 | EXPECT_TRUE(IsZipValid("84000", "Utah")); |
| 277 | EXPECT_TRUE(IsZipValid("22000", "VA")); |
| 278 | EXPECT_TRUE(IsZipValid("24000", "Virginia")); |
| 279 | EXPECT_TRUE(IsZipValid("06000", "VI")); |
| 280 | EXPECT_TRUE(IsZipValid("09000", "Virgin Islands")); |
| 281 | EXPECT_TRUE(IsZipValid("05000", "VT")); |
| 282 | EXPECT_TRUE(IsZipValid("05000", "Vermont")); |
| 283 | EXPECT_TRUE(IsZipValid("98000", "WA")); |
| 284 | EXPECT_TRUE(IsZipValid("99000", "Washington")); |
| 285 | EXPECT_TRUE(IsZipValid("53000", "WI")); |
| 286 | EXPECT_TRUE(IsZipValid("54000", "Wisconsin")); |
| 287 | EXPECT_TRUE(IsZipValid("24000", "WV")); |
| 288 | EXPECT_TRUE(IsZipValid("26000", "West Virginia")); |
| 289 | EXPECT_TRUE(IsZipValid("82000", "WY")); |
| 290 | EXPECT_TRUE(IsZipValid("83000", "Wyoming")); |
| 291 | } |
| 292 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 293 | TEST_F(AddressParserTest, LocationName) { |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 294 | EXPECT_FALSE(IsLocationName("str-eet")); |
| 295 | EXPECT_FALSE(IsLocationName("somewhere")); |
| 296 | |
| 297 | // Test all supported street names and expected plural cases. |
| 298 | EXPECT_TRUE(IsLocationName("alley")); |
| 299 | EXPECT_TRUE(IsLocationName("annex")); |
| 300 | EXPECT_TRUE(IsLocationName("arcade")); |
| 301 | EXPECT_TRUE(IsLocationName("ave.")); |
| 302 | EXPECT_TRUE(IsLocationName("avenue")); |
| 303 | EXPECT_TRUE(IsLocationName("alameda")); |
| 304 | EXPECT_TRUE(IsLocationName("bayou")); |
| 305 | EXPECT_TRUE(IsLocationName("beach")); |
| 306 | EXPECT_TRUE(IsLocationName("bend")); |
| 307 | EXPECT_TRUE(IsLocationName("bluff")); |
| 308 | EXPECT_TRUE(IsLocationName("bluffs")); |
| 309 | EXPECT_TRUE(IsLocationName("bottom")); |
| 310 | EXPECT_TRUE(IsLocationName("boulevard")); |
| 311 | EXPECT_TRUE(IsLocationName("branch")); |
| 312 | EXPECT_TRUE(IsLocationName("bridge")); |
| 313 | EXPECT_TRUE(IsLocationName("brook")); |
| 314 | EXPECT_TRUE(IsLocationName("brooks")); |
| 315 | EXPECT_TRUE(IsLocationName("burg")); |
| 316 | EXPECT_TRUE(IsLocationName("burgs")); |
| 317 | EXPECT_TRUE(IsLocationName("bypass")); |
| 318 | EXPECT_TRUE(IsLocationName("broadway")); |
| 319 | EXPECT_TRUE(IsLocationName("camino")); |
| 320 | EXPECT_TRUE(IsLocationName("camp")); |
| 321 | EXPECT_TRUE(IsLocationName("canyon")); |
| 322 | EXPECT_TRUE(IsLocationName("cape")); |
| 323 | EXPECT_TRUE(IsLocationName("causeway")); |
| 324 | EXPECT_TRUE(IsLocationName("center")); |
| 325 | EXPECT_TRUE(IsLocationName("centers")); |
| 326 | EXPECT_TRUE(IsLocationName("circle")); |
| 327 | EXPECT_TRUE(IsLocationName("circles")); |
| 328 | EXPECT_TRUE(IsLocationName("cliff")); |
| 329 | EXPECT_TRUE(IsLocationName("cliffs")); |
| 330 | EXPECT_TRUE(IsLocationName("club")); |
| 331 | EXPECT_TRUE(IsLocationName("common")); |
| 332 | EXPECT_TRUE(IsLocationName("corner")); |
| 333 | EXPECT_TRUE(IsLocationName("corners")); |
| 334 | EXPECT_TRUE(IsLocationName("course")); |
| 335 | EXPECT_TRUE(IsLocationName("court")); |
| 336 | EXPECT_TRUE(IsLocationName("courts")); |
| 337 | EXPECT_TRUE(IsLocationName("cove")); |
| 338 | EXPECT_TRUE(IsLocationName("coves")); |
| 339 | EXPECT_TRUE(IsLocationName("creek")); |
| 340 | EXPECT_TRUE(IsLocationName("crescent")); |
| 341 | EXPECT_TRUE(IsLocationName("crest")); |
| 342 | EXPECT_TRUE(IsLocationName("crossing")); |
| 343 | EXPECT_TRUE(IsLocationName("crossroad")); |
| 344 | EXPECT_TRUE(IsLocationName("curve")); |
| 345 | EXPECT_TRUE(IsLocationName("circulo")); |
| 346 | EXPECT_TRUE(IsLocationName("dale")); |
| 347 | EXPECT_TRUE(IsLocationName("dam")); |
| 348 | EXPECT_TRUE(IsLocationName("divide")); |
| 349 | EXPECT_TRUE(IsLocationName("drive")); |
| 350 | EXPECT_TRUE(IsLocationName("drives")); |
| 351 | EXPECT_TRUE(IsLocationName("estate")); |
| 352 | EXPECT_TRUE(IsLocationName("estates")); |
| 353 | EXPECT_TRUE(IsLocationName("expressway")); |
| 354 | EXPECT_TRUE(IsLocationName("extension")); |
| 355 | EXPECT_TRUE(IsLocationName("extensions")); |
| 356 | EXPECT_TRUE(IsLocationName("fall")); |
| 357 | EXPECT_TRUE(IsLocationName("falls")); |
| 358 | EXPECT_TRUE(IsLocationName("ferry")); |
| 359 | EXPECT_TRUE(IsLocationName("field")); |
| 360 | EXPECT_TRUE(IsLocationName("fields")); |
| 361 | EXPECT_TRUE(IsLocationName("flat")); |
| 362 | EXPECT_TRUE(IsLocationName("flats")); |
| 363 | EXPECT_TRUE(IsLocationName("ford")); |
| 364 | EXPECT_TRUE(IsLocationName("fords")); |
| 365 | EXPECT_TRUE(IsLocationName("forest")); |
| 366 | EXPECT_TRUE(IsLocationName("forge")); |
| 367 | EXPECT_TRUE(IsLocationName("forges")); |
| 368 | EXPECT_TRUE(IsLocationName("fork")); |
| 369 | EXPECT_TRUE(IsLocationName("forks")); |
| 370 | EXPECT_TRUE(IsLocationName("fort")); |
| 371 | EXPECT_TRUE(IsLocationName("freeway")); |
| 372 | EXPECT_TRUE(IsLocationName("garden")); |
| 373 | EXPECT_TRUE(IsLocationName("gardens")); |
| 374 | EXPECT_TRUE(IsLocationName("gateway")); |
| 375 | EXPECT_TRUE(IsLocationName("glen")); |
| 376 | EXPECT_TRUE(IsLocationName("glens")); |
| 377 | EXPECT_TRUE(IsLocationName("green")); |
| 378 | EXPECT_TRUE(IsLocationName("greens")); |
| 379 | EXPECT_TRUE(IsLocationName("grove")); |
| 380 | EXPECT_TRUE(IsLocationName("groves")); |
| 381 | EXPECT_TRUE(IsLocationName("harbor")); |
| 382 | EXPECT_TRUE(IsLocationName("harbors")); |
| 383 | EXPECT_TRUE(IsLocationName("haven")); |
| 384 | EXPECT_TRUE(IsLocationName("heights")); |
| 385 | EXPECT_TRUE(IsLocationName("highway")); |
| 386 | EXPECT_TRUE(IsLocationName("hill")); |
| 387 | EXPECT_TRUE(IsLocationName("hills")); |
| 388 | EXPECT_TRUE(IsLocationName("hollow")); |
| 389 | EXPECT_TRUE(IsLocationName("inlet")); |
| 390 | EXPECT_TRUE(IsLocationName("island")); |
| 391 | EXPECT_TRUE(IsLocationName("islands")); |
| 392 | EXPECT_TRUE(IsLocationName("isle")); |
| 393 | EXPECT_TRUE(IsLocationName("junction")); |
| 394 | EXPECT_TRUE(IsLocationName("junctions")); |
| 395 | EXPECT_TRUE(IsLocationName("key")); |
| 396 | EXPECT_TRUE(IsLocationName("keys")); |
| 397 | EXPECT_TRUE(IsLocationName("knoll")); |
| 398 | EXPECT_TRUE(IsLocationName("knolls")); |
| 399 | EXPECT_TRUE(IsLocationName("lake")); |
| 400 | EXPECT_TRUE(IsLocationName("lakes")); |
| 401 | EXPECT_TRUE(IsLocationName("land")); |
| 402 | EXPECT_TRUE(IsLocationName("landing")); |
| 403 | EXPECT_TRUE(IsLocationName("lane")); |
| 404 | EXPECT_TRUE(IsLocationName("light")); |
| 405 | EXPECT_TRUE(IsLocationName("lights")); |
| 406 | EXPECT_TRUE(IsLocationName("loaf")); |
| 407 | EXPECT_TRUE(IsLocationName("lock")); |
| 408 | EXPECT_TRUE(IsLocationName("locks")); |
| 409 | EXPECT_TRUE(IsLocationName("lodge")); |
| 410 | EXPECT_TRUE(IsLocationName("loop")); |
| 411 | EXPECT_TRUE(IsLocationName("mall")); |
| 412 | EXPECT_TRUE(IsLocationName("manor")); |
| 413 | EXPECT_TRUE(IsLocationName("manors")); |
| 414 | EXPECT_TRUE(IsLocationName("meadow")); |
| 415 | EXPECT_TRUE(IsLocationName("meadows")); |
| 416 | EXPECT_TRUE(IsLocationName("mews")); |
| 417 | EXPECT_TRUE(IsLocationName("mill")); |
| 418 | EXPECT_TRUE(IsLocationName("mills")); |
| 419 | EXPECT_TRUE(IsLocationName("mission")); |
| 420 | EXPECT_TRUE(IsLocationName("motorway")); |
| 421 | EXPECT_TRUE(IsLocationName("mount")); |
| 422 | EXPECT_TRUE(IsLocationName("mountain")); |
| 423 | EXPECT_TRUE(IsLocationName("mountains")); |
| 424 | EXPECT_TRUE(IsLocationName("neck")); |
| 425 | EXPECT_TRUE(IsLocationName("orchard")); |
| 426 | EXPECT_TRUE(IsLocationName("oval")); |
| 427 | EXPECT_TRUE(IsLocationName("overpass")); |
| 428 | EXPECT_TRUE(IsLocationName("park")); |
| 429 | EXPECT_TRUE(IsLocationName("parks")); |
| 430 | EXPECT_TRUE(IsLocationName("parkway")); |
| 431 | EXPECT_TRUE(IsLocationName("parkways")); |
| 432 | EXPECT_TRUE(IsLocationName("pass")); |
| 433 | EXPECT_TRUE(IsLocationName("passage")); |
| 434 | EXPECT_TRUE(IsLocationName("path")); |
| 435 | EXPECT_TRUE(IsLocationName("pike")); |
| 436 | EXPECT_TRUE(IsLocationName("pine")); |
| 437 | EXPECT_TRUE(IsLocationName("pines")); |
| 438 | EXPECT_TRUE(IsLocationName("plain")); |
| 439 | EXPECT_TRUE(IsLocationName("plains")); |
| 440 | EXPECT_TRUE(IsLocationName("plaza")); |
| 441 | EXPECT_TRUE(IsLocationName("point")); |
| 442 | EXPECT_TRUE(IsLocationName("points")); |
| 443 | EXPECT_TRUE(IsLocationName("port")); |
| 444 | EXPECT_TRUE(IsLocationName("ports")); |
| 445 | EXPECT_TRUE(IsLocationName("prairie")); |
| 446 | EXPECT_TRUE(IsLocationName("privada")); |
| 447 | EXPECT_TRUE(IsLocationName("radial")); |
| 448 | EXPECT_TRUE(IsLocationName("ramp")); |
| 449 | EXPECT_TRUE(IsLocationName("ranch")); |
| 450 | EXPECT_TRUE(IsLocationName("rapid")); |
| 451 | EXPECT_TRUE(IsLocationName("rapids")); |
| 452 | EXPECT_TRUE(IsLocationName("rest")); |
| 453 | EXPECT_TRUE(IsLocationName("ridge")); |
| 454 | EXPECT_TRUE(IsLocationName("ridges")); |
| 455 | EXPECT_TRUE(IsLocationName("river")); |
| 456 | EXPECT_TRUE(IsLocationName("road")); |
| 457 | EXPECT_TRUE(IsLocationName("roads")); |
| 458 | EXPECT_TRUE(IsLocationName("route")); |
| 459 | EXPECT_TRUE(IsLocationName("row")); |
| 460 | EXPECT_TRUE(IsLocationName("rue")); |
| 461 | EXPECT_TRUE(IsLocationName("run")); |
| 462 | EXPECT_TRUE(IsLocationName("shoal")); |
| 463 | EXPECT_TRUE(IsLocationName("shoals")); |
| 464 | EXPECT_TRUE(IsLocationName("shore")); |
| 465 | EXPECT_TRUE(IsLocationName("shores")); |
| 466 | EXPECT_TRUE(IsLocationName("skyway")); |
| 467 | EXPECT_TRUE(IsLocationName("spring")); |
| 468 | EXPECT_TRUE(IsLocationName("springs")); |
| 469 | EXPECT_TRUE(IsLocationName("spur")); |
| 470 | EXPECT_TRUE(IsLocationName("spurs")); |
| 471 | EXPECT_TRUE(IsLocationName("square")); |
| 472 | EXPECT_TRUE(IsLocationName("squares")); |
| 473 | EXPECT_TRUE(IsLocationName("station")); |
| 474 | EXPECT_TRUE(IsLocationName("stravenue")); |
| 475 | EXPECT_TRUE(IsLocationName("stream")); |
| 476 | EXPECT_TRUE(IsLocationName("st.")); |
| 477 | EXPECT_TRUE(IsLocationName("street")); |
| 478 | EXPECT_TRUE(IsLocationName("streets")); |
| 479 | EXPECT_TRUE(IsLocationName("summit")); |
| 480 | EXPECT_TRUE(IsLocationName("speedway")); |
| 481 | EXPECT_TRUE(IsLocationName("terrace")); |
| 482 | EXPECT_TRUE(IsLocationName("throughway")); |
| 483 | EXPECT_TRUE(IsLocationName("trace")); |
| 484 | EXPECT_TRUE(IsLocationName("track")); |
| 485 | EXPECT_TRUE(IsLocationName("trafficway")); |
| 486 | EXPECT_TRUE(IsLocationName("trail")); |
| 487 | EXPECT_TRUE(IsLocationName("tunnel")); |
| 488 | EXPECT_TRUE(IsLocationName("turnpike")); |
| 489 | EXPECT_TRUE(IsLocationName("underpass")); |
| 490 | EXPECT_TRUE(IsLocationName("union")); |
| 491 | EXPECT_TRUE(IsLocationName("unions")); |
| 492 | EXPECT_TRUE(IsLocationName("valley")); |
| 493 | EXPECT_TRUE(IsLocationName("valleys")); |
| 494 | EXPECT_TRUE(IsLocationName("viaduct")); |
| 495 | EXPECT_TRUE(IsLocationName("view")); |
| 496 | EXPECT_TRUE(IsLocationName("views")); |
| 497 | EXPECT_TRUE(IsLocationName("village")); |
| 498 | EXPECT_TRUE(IsLocationName("villages")); |
| 499 | EXPECT_TRUE(IsLocationName("ville")); |
| 500 | EXPECT_TRUE(IsLocationName("vista")); |
| 501 | EXPECT_TRUE(IsLocationName("walk")); |
| 502 | EXPECT_TRUE(IsLocationName("walks")); |
| 503 | EXPECT_TRUE(IsLocationName("wall")); |
| 504 | EXPECT_TRUE(IsLocationName("way")); |
| 505 | EXPECT_TRUE(IsLocationName("ways")); |
| 506 | EXPECT_TRUE(IsLocationName("well")); |
| 507 | EXPECT_TRUE(IsLocationName("wells")); |
| 508 | EXPECT_TRUE(IsLocationName("xing")); |
| 509 | EXPECT_TRUE(IsLocationName("xrd")); |
| 510 | } |
| 511 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 512 | TEST_F(AddressParserTest, NumberPrefixCases) { |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 513 | EXPECT_EQ(FindAddress("Cafe 21\n750 Fifth Ave. San Diego, California 92101"), |
| 514 | "750 Fifth Ave. San Diego, California 92101"); |
| 515 | EXPECT_EQ(FindAddress( |
| 516 | "Century City 15\n 10250 Santa Monica Boulevard Los Angeles, CA 90067"), |
| 517 | "10250 Santa Monica Boulevard Los Angeles, CA 90067"); |
| 518 | EXPECT_EQ(FindAddress("123 45\n67 My Street, Somewhere, NY 10000"), |
| 519 | "67 My Street, Somewhere, NY 10000"); |
| 520 | EXPECT_TRUE(IsAddress("123 4th Avenue, Somewhere in NY 10000")); |
| 521 | } |
| 522 | |
[email protected] | be77471b | 2012-05-30 19:04:03 | [diff] [blame] | 523 | TEST_F(AddressParserTest, FullAddress) { |
[email protected] | c72652a | 2012-05-10 12:33:11 | [diff] [blame] | 524 | // Test US Google corporate addresses. Expects a full string match. |
| 525 | EXPECT_TRUE(IsAddress("1600 Amphitheatre Parkway Mountain View, CA 94043")); |
| 526 | EXPECT_TRUE(IsAddress("201 S. Division St. Suite 500 Ann Arbor, MI 48104")); |
| 527 | EXPECT_TRUE(ContainsAddress( |
| 528 | "Millennium at Midtown 10 10th Street NE Suite 600 Atlanta, GA 30309")); |
| 529 | EXPECT_TRUE(IsAddress( |
| 530 | "9606 North MoPac Expressway Suite 400 Austin, TX 78759")); |
| 531 | EXPECT_TRUE(IsAddress("2590 Pearl Street Suite 100 Boulder, CO 80302")); |
| 532 | EXPECT_TRUE(IsAddress("5 Cambridge Center, Floors 3-6 Cambridge, MA 02142")); |
| 533 | EXPECT_TRUE(IsAddress("410 Market St Suite 415 Chapel Hill, NC 27516")); |
| 534 | EXPECT_TRUE(IsAddress("20 West Kinzie St. Chicago, IL 60654")); |
| 535 | EXPECT_TRUE(IsAddress("114 Willits Street Birmingham, MI 48009")); |
| 536 | EXPECT_TRUE(IsAddress("19540 Jamboree Road 2nd Floor Irvine, CA 92612")); |
| 537 | EXPECT_TRUE(IsAddress("747 6th Street South, Kirkland, WA 98033")); |
| 538 | EXPECT_TRUE(IsAddress("301 S. Blount St. Suite 301 Madison, WI 53703")); |
| 539 | EXPECT_TRUE(IsAddress("76 Ninth Avenue 4th Floor New York, NY 10011")); |
| 540 | EXPECT_TRUE(ContainsAddress( |
| 541 | "Chelsea Markset Space, 75 Ninth Avenue 2nd and 4th Floors New York, \ |
| 542 | NY 10011")); |
| 543 | EXPECT_TRUE(IsAddress("6425 Penn Ave. Suite 700 Pittsburgh, PA 15206")); |
| 544 | EXPECT_TRUE(IsAddress("1818 Library Street Suite 400 Reston, VA 20190")); |
| 545 | EXPECT_TRUE(IsAddress("345 Spear Street Floors 2-4 San Francisco, CA 94105")); |
| 546 | EXPECT_TRUE(IsAddress("604 Arizona Avenue Santa Monica, CA 90401")); |
| 547 | EXPECT_TRUE(IsAddress("651 N. 34th St. Seattle, WA 98103")); |
| 548 | EXPECT_TRUE(IsAddress( |
| 549 | "1101 New York Avenue, N.W. Second Floor Washington, DC 20005")); |
| 550 | |
| 551 | // Other tests. |
| 552 | EXPECT_TRUE(IsAddress("57th Street and Lake Shore Drive\nChicago, IL 60637")); |
| 553 | EXPECT_TRUE(IsAddress("308 Congress Street Boston, MA 02210")); |
| 554 | EXPECT_TRUE(ContainsAddress( |
| 555 | "Central Park West at 79th Street, New York, NY, 10024-5192")); |
| 556 | EXPECT_TRUE(ContainsAddress( |
| 557 | "Lincoln Park | 100 34th Avenue • San Francisco, CA 94121 | 41575036")); |
| 558 | |
| 559 | EXPECT_EQ(FindAddress("Lorem ipsum dolor sit amet, consectetur adipisicing " \ |
| 560 | "elit, sed do 1600 Amphitheatre Parkway Mountain View, CA 94043 " \ |
| 561 | "eiusmod tempor incididunt ut labore et dolore magna aliqua."), |
| 562 | "1600 Amphitheatre Parkway Mountain View, CA 94043"); |
| 563 | |
| 564 | EXPECT_EQ(FindAddress("2590 Pearl Street Suite 100 Boulder, CO 80302 6425 " \ |
| 565 | "Penn Ave. Suite 700 Pittsburgh, PA 15206"), |
| 566 | "2590 Pearl Street Suite 100 Boulder, CO 80302"); |
| 567 | |
| 568 | EXPECT_TRUE(ContainsAddress( |
| 569 | "住所は 1600 Amphitheatre Parkway Mountain View, CA 94043 です。")); |
| 570 | |
| 571 | EXPECT_FALSE(ContainsAddress("1 st. too-short, CA 90000")); |
| 572 | EXPECT_TRUE(ContainsAddress("1 st. long enough, CA 90000")); |
| 573 | |
| 574 | EXPECT_TRUE(ContainsAddress("1 st. some city in al 35000")); |
| 575 | EXPECT_FALSE(ContainsAddress("1 book st Aquinas et al 35000")); |
| 576 | |
| 577 | EXPECT_FALSE(ContainsAddress("1 this comes too late: street, CA 90000")); |
| 578 | EXPECT_TRUE(ContainsAddress("1 this is ok: street, CA 90000")); |
| 579 | |
| 580 | EXPECT_FALSE(ContainsAddress( |
| 581 | "1 street I love verbosity, so I'm writing an address with too many " \ |
| 582 | "words CA 90000")); |
| 583 | EXPECT_TRUE(ContainsAddress("1 street 2 3 4 5 6 7 8 9 10 11 12, CA 90000")); |
| 584 | |
| 585 | EXPECT_TRUE(IsAddress("79th Street 1st Floor New York City, NY 10024-5192")); |
| 586 | |
| 587 | EXPECT_FALSE(ContainsAddress("123 Fake Street, Springfield, Springfield")); |
| 588 | EXPECT_FALSE(ContainsAddress("999 Street Avenue, City, ZZ 98765")); |
| 589 | EXPECT_FALSE(ContainsAddress("76 Here be dragons, CA 94043")); |
| 590 | EXPECT_FALSE(ContainsAddress("1 This, has, too* many, lines, to, be* valid")); |
| 591 | EXPECT_FALSE(ContainsAddress( |
| 592 | "1 Supercalifragilisticexpialidocious is too long, CA 90000")); |
| 593 | EXPECT_FALSE(ContainsAddress("")); |
| 594 | } |