initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame^] | 1 | // Copyright 2008, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | #include <stdlib.h> |
| 31 | #include <windows.h> |
| 32 | |
| 33 | #include "base/basictypes.h" |
| 34 | #include "base/logging.h" |
| 35 | #include "base/path_service.h" |
| 36 | #include "base/string_util.h" |
| 37 | #include "chrome/browser/url_fixer_upper.h" |
| 38 | #include "chrome/common/chrome_paths.h" |
| 39 | #include "googleurl/src/url_parse.h" |
| 40 | #include "googleurl/src/gurl.h" |
| 41 | #include "net/base/net_util.h" |
| 42 | #include "testing/gtest/include/gtest/gtest.h" |
| 43 | |
| 44 | namespace { |
| 45 | class URLFixerUpperTest : public testing::Test { |
| 46 | }; |
| 47 | }; |
| 48 | |
| 49 | std::ostream& operator<<(std::ostream& os, const url_parse::Component& part) { |
| 50 | return os << "(begin=" << part.begin << ", len=" << part.len << ")"; |
| 51 | } |
| 52 | |
| 53 | struct segment_case { |
| 54 | const std::wstring input; |
| 55 | const std::wstring result; |
| 56 | const url_parse::Component scheme; |
| 57 | const url_parse::Component username; |
| 58 | const url_parse::Component password; |
| 59 | const url_parse::Component host; |
| 60 | const url_parse::Component port; |
| 61 | const url_parse::Component path; |
| 62 | const url_parse::Component query; |
| 63 | const url_parse::Component ref; |
| 64 | }; |
| 65 | |
| 66 | static const segment_case segment_cases[] = { |
| 67 | { L"http://www.google.com/", L"http", |
| 68 | url_parse::Component(0, 4), // scheme |
| 69 | url_parse::Component(), // username |
| 70 | url_parse::Component(), // password |
| 71 | url_parse::Component(7, 14), // host |
| 72 | url_parse::Component(), // port |
| 73 | url_parse::Component(21, 1), // path |
| 74 | url_parse::Component(), // query |
| 75 | url_parse::Component(), // ref |
| 76 | }, |
| 77 | { L"aBoUt:vErSiOn", L"about", |
| 78 | url_parse::Component(0, 5), // scheme |
| 79 | url_parse::Component(), // username |
| 80 | url_parse::Component(), // password |
| 81 | url_parse::Component(), // host |
| 82 | url_parse::Component(), // port |
| 83 | url_parse::Component(), // path |
| 84 | url_parse::Component(), // query |
| 85 | url_parse::Component(), // ref |
| 86 | }, |
| 87 | { L" www.google.com:124?foo#", L"http", |
| 88 | url_parse::Component(), // scheme |
| 89 | url_parse::Component(), // username |
| 90 | url_parse::Component(), // password |
| 91 | url_parse::Component(4, 14), // host |
| 92 | url_parse::Component(19, 3), // port |
| 93 | url_parse::Component(), // path |
| 94 | url_parse::Component(23, 3), // query |
| 95 | url_parse::Component(27, 0), // ref |
| 96 | }, |
| 97 | { L"[email protected]", L"http", |
| 98 | url_parse::Component(), // scheme |
| 99 | url_parse::Component(0, 4), // username |
| 100 | url_parse::Component(), // password |
| 101 | url_parse::Component(5, 14), // host |
| 102 | url_parse::Component(), // port |
| 103 | url_parse::Component(), // path |
| 104 | url_parse::Component(), // query |
| 105 | url_parse::Component(), // ref |
| 106 | }, |
| 107 | { L"ftp:/user:P:[email protected]...::23///pub?foo#bar", L"ftp", |
| 108 | url_parse::Component(0, 3), // scheme |
| 109 | url_parse::Component(5, 4), // username |
| 110 | url_parse::Component(10, 7), // password |
| 111 | url_parse::Component(18, 20), // host |
| 112 | url_parse::Component(39, 2), // port |
| 113 | url_parse::Component(41, 6), // path |
| 114 | url_parse::Component(48, 3), // query |
| 115 | url_parse::Component(52, 3), // ref |
| 116 | }, |
| 117 | }; |
| 118 | |
| 119 | TEST(URLFixerUpperTest, SegmentURL) { |
| 120 | std::wstring result; |
| 121 | url_parse::Parsed parts; |
| 122 | |
| 123 | for (int i = 0; i < arraysize(segment_cases); ++i) { |
| 124 | segment_case value = segment_cases[i]; |
| 125 | result = URLFixerUpper::SegmentURL(value.input, &parts); |
| 126 | EXPECT_EQ(value.result, result); |
| 127 | EXPECT_EQ(value.scheme, parts.scheme); |
| 128 | EXPECT_EQ(value.username, parts.username); |
| 129 | EXPECT_EQ(value.password, parts.password); |
| 130 | EXPECT_EQ(value.host, parts.host); |
| 131 | EXPECT_EQ(value.port, parts.port); |
| 132 | EXPECT_EQ(value.path, parts.path); |
| 133 | EXPECT_EQ(value.query, parts.query); |
| 134 | EXPECT_EQ(value.ref, parts.ref); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | // Creates a file and returns its full name as well as the decomposed |
| 139 | // version. Example: |
| 140 | // full_path = "c:\foo\bar.txt" |
| 141 | // dir = "c:\foo" |
| 142 | // file_name = "bar.txt" |
| 143 | static bool MakeTempFile(const std::wstring& dir, |
| 144 | const std::wstring& file_name, |
| 145 | std::wstring* full_path) { |
| 146 | *full_path = dir + L"\\" + file_name; |
| 147 | |
| 148 | HANDLE hfile = CreateFile(full_path->c_str(), GENERIC_READ | GENERIC_WRITE, |
| 149 | 0, NULL, CREATE_ALWAYS, 0, NULL); |
| 150 | if (hfile == NULL || hfile == INVALID_HANDLE_VALUE) |
| 151 | return false; |
| 152 | CloseHandle(hfile); |
| 153 | return true; |
| 154 | } |
| 155 | |
| 156 | // Returns true if the given URL is a file: URL that matches the given file |
| 157 | static bool IsMatchingFileURL(const std::wstring& url, |
| 158 | const std::wstring& full_file_path) { |
| 159 | if (url.length() <= 8) |
| 160 | return false; |
| 161 | if (std::wstring(L"file:///") != url.substr(0, 8)) |
| 162 | return false; // no file:/// prefix |
| 163 | if (url.find('\\') != std::wstring::npos) |
| 164 | return false; // contains backslashes |
| 165 | |
| 166 | std::wstring derived_path; |
| 167 | net_util::FileURLToFilePath(GURL(url), &derived_path); |
| 168 | return (derived_path.length() == full_file_path.length()) && |
| 169 | std::equal(derived_path.begin(), derived_path.end(), |
| 170 | full_file_path.begin(), CaseInsensitiveCompare<wchar_t>()); |
| 171 | } |
| 172 | |
| 173 | struct fixup_case { |
| 174 | const std::wstring input; |
| 175 | const std::wstring desired_tld; |
| 176 | const std::wstring output; |
| 177 | } fixup_cases[] = { |
| 178 | {L"www.google.com", L"", L"http://www.google.com/"}, |
| 179 | {L" www.google.com ", L"", L"http://www.google.com/"}, |
| 180 | {L" foo.com/asdf bar", L"", L"http://foo.com/asdf bar"}, |
| 181 | {L"..www.google.com..", L"", L"http://www.google.com./"}, |
| 182 | {L"http://......", L"", L"http://....../"}, |
| 183 | {L"http://host.com:ninety-two/", L"", L"http://host.com/"}, |
| 184 | {L"http://host.com:ninety-two?foo", L"", L"http://host.com/?foo"}, |
| 185 | {L"google.com:123", L"", L"http://google.com:123/"}, |
| 186 | {L"about:", L"", L"about:"}, |
| 187 | {L"about:version", L"", L"about:version"}, |
| 188 | {L"www:123", L"", L"http://www:123/"}, |
| 189 | {L" www:123", L"", L"http://www:123/"}, |
| 190 | {L"www.google.com?foo", L"", L"http://www.google.com/?foo"}, |
| 191 | {L"www.google.com#foo", L"", L"http://www.google.com/#foo"}, |
| 192 | {L"www.google.com?", L"", L"http://www.google.com/?"}, |
| 193 | {L"www.google.com#", L"", L"http://www.google.com/#"}, |
| 194 | {L"www.google.com:123?foo#bar", L"", L"http://www.google.com:123/?foo#bar"}, |
| 195 | {L"[email protected]", L"", L"http://[email protected]/"}, |
| 196 | {L"\x6C34.com", L"", L"http://\x6C34.com/" }, |
| 197 | // It would be better if this next case got treated as http, but I don't see |
| 198 | // a clean way to guess this isn't the new-and-exciting "user" scheme. |
| 199 | {L"user:[email protected]:8080/", L"", L"user:[email protected]:8080/"}, |
| 200 | //{L"file:///c:/foo/bar%20baz.txt", L"", L"file:///C:/foo/bar%20baz.txt"}, |
| 201 | }; |
| 202 | |
| 203 | TEST(URLFixerUpperTest, FixupURL) { |
| 204 | std::wstring output; |
| 205 | |
| 206 | for (int i = 0; i < arraysize(fixup_cases); ++i) { |
| 207 | fixup_case value = fixup_cases[i]; |
| 208 | output = URLFixerUpper::FixupURL(value.input, value.desired_tld); |
| 209 | EXPECT_EQ(value.output, output); |
| 210 | } |
| 211 | |
| 212 | // Check the TLD-appending functionality |
| 213 | fixup_case tld_cases[] = { |
| 214 | {L"google", L"com", L"http://www.google.com/"}, |
| 215 | {L"google.", L"com", L"http://www.google.com/"}, |
| 216 | {L"google..", L"com", L"http://www.google.com/"}, |
| 217 | {L".google", L"com", L"http://www.google.com/"}, |
| 218 | {L"www.google", L"com", L"http://www.google.com/"}, |
| 219 | {L"google.com", L"com", L"http://google.com/"}, |
| 220 | {L"http://google", L"com", L"http://www.google.com/"}, |
| 221 | {L"..google..", L"com", L"http://www.google.com/"}, |
| 222 | {L"http://www.google", L"com", L"http://www.google.com/"}, |
| 223 | {L"google/foo", L"com", L"http://www.google.com/foo"}, |
| 224 | {L"google.com/foo", L"com", L"http://google.com/foo"}, |
| 225 | {L"google/?foo=.com", L"com", L"http://www.google.com/?foo=.com"}, |
| 226 | {L"www.google/?foo=www.", L"com", L"http://www.google.com/?foo=www."}, |
| 227 | {L"google.com/?foo=.com", L"com", L"http://google.com/?foo=.com"}, |
| 228 | {L"http://www.google.com", L"com", L"http://www.google.com/"}, |
| 229 | {L"google:123", L"com", L"http://www.google.com:123/"}, |
| 230 | {L"http://google:123", L"com", L"http://www.google.com:123/"}, |
| 231 | }; |
| 232 | for (int i = 0; i < arraysize(tld_cases); ++i) { |
| 233 | fixup_case value = tld_cases[i]; |
| 234 | output = URLFixerUpper::FixupURL(value.input, value.desired_tld); |
| 235 | EXPECT_EQ(value.output, output); |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | // Test different types of file inputs to URIFixerUpper::FixupURL. This |
| 240 | // doesn't go into the nice array of fixups above since the file input |
| 241 | // has to exist. |
| 242 | TEST(URLFixerUpperTest, FixupFile) { |
| 243 | // this "original" filename is the one we tweak to get all the variations |
| 244 | std::wstring dir; |
| 245 | std::wstring original; |
| 246 | ASSERT_TRUE(PathService::Get(chrome::DIR_APP, &dir)); |
| 247 | ASSERT_TRUE(MakeTempFile(dir, L"url fixer upper existing file.txt", |
| 248 | &original)); |
| 249 | |
| 250 | // reference path |
| 251 | std::wstring golden = |
| 252 | UTF8ToWide(net_util::FilePathToFileURL(original).spec()); |
| 253 | |
| 254 | // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic) |
| 255 | std::wstring fixedup = URLFixerUpper::FixupURL(original, L""); |
| 256 | EXPECT_EQ(golden, fixedup); |
| 257 | |
| 258 | // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon) |
| 259 | std::wstring cur(original); |
| 260 | EXPECT_EQ(':', cur[1]); |
| 261 | cur[1] = '|'; |
| 262 | fixedup = URLFixerUpper::FixupURL(cur, L""); |
| 263 | EXPECT_EQ(golden, fixedup); |
| 264 | |
| 265 | fixup_case file_cases[] = { |
| 266 | // File URLs go through GURL, which tries to escape intelligently. |
| 267 | {L"c:\\This%20is a non-existent file.txt", L"", L"file:///C:/This%2520is%20a%20non-existent%20file.txt"}, |
| 268 | |
| 269 | // \\foo\bar.txt -> file://foo/bar.txt |
| 270 | // UNC paths, this file won't exist, but since there are no escapes, it |
| 271 | // should be returned just converted to a file: URL. |
| 272 | {L"\\\\SomeNonexistentHost\\foo\\bar.txt", L"", L"file://somenonexistenthost/foo/bar.txt"}, |
| 273 | {L"//SomeNonexistentHost\\foo/bar.txt", L"", L"file://somenonexistenthost/foo/bar.txt"}, |
| 274 | {L"file:///C:/foo/bar", L"", L"file:///C:/foo/bar"}, |
| 275 | |
| 276 | // These are fixups we don't do, but could consider: |
| 277 | // |
| 278 | // {L"file://C:/foo/bar", L"", L"file:///C:/foo/bar"}, |
| 279 | // {L"file:c:", L"", L"file:///c:/"}, |
| 280 | // {L"file:c:WINDOWS", L"", L"file:///c:/WINDOWS"}, |
| 281 | // {L"file:c|Program Files", L"", L"file:///c:/Program Files"}, |
| 282 | // {L"file:///foo:/bar", L"", L"file://foo/bar"}, |
| 283 | // {L"file:/file", L"", L"file://file/"}, |
| 284 | // {L"file:////////c:\\foo", L"", L"file:///c:/foo"}, |
| 285 | // {L"file://server/folder/file", L"", L"file://server/folder/file"}, |
| 286 | // {L"file:/\\/server\\folder/file", L"", L"file://server/folder/file"}, |
| 287 | }; |
| 288 | for (int i = 0; i < arraysize(file_cases); i++) { |
| 289 | fixedup = URLFixerUpper::FixupURL(file_cases[i].input, |
| 290 | file_cases[i].desired_tld); |
| 291 | EXPECT_EQ(file_cases[i].output, fixedup); |
| 292 | } |
| 293 | |
| 294 | EXPECT_TRUE(DeleteFile(original.c_str())); |
| 295 | } |
| 296 | |
| 297 | TEST(URLFixerUpperTest, FixupRelativeFile) { |
| 298 | std::wstring full_path, dir; |
| 299 | std::wstring file_part(L"url_fixer_upper_existing_file.txt"); |
| 300 | ASSERT_TRUE(PathService::Get(chrome::DIR_APP, &dir)); |
| 301 | ASSERT_TRUE(MakeTempFile(dir, file_part, &full_path)); |
| 302 | |
| 303 | // make sure we pass through good URLs |
| 304 | std::wstring fixedup; |
| 305 | for (int i = 0; i < arraysize(fixup_cases); ++i) { |
| 306 | fixup_case value = fixup_cases[i]; |
| 307 | fixedup = URLFixerUpper::FixupRelativeFile(dir, value.input); |
| 308 | EXPECT_EQ(value.output, fixedup); |
| 309 | } |
| 310 | |
| 311 | // make sure the existing file got fixed-up to a file URL, and that there |
| 312 | // are no backslashes |
| 313 | fixedup = URLFixerUpper::FixupRelativeFile(dir, file_part); |
| 314 | EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); |
| 315 | EXPECT_TRUE(DeleteFile(full_path.c_str())); |
| 316 | |
| 317 | // create a filename we know doesn't exist and make sure it doesn't get |
| 318 | // fixed up to a file URL |
| 319 | std::wstring nonexistent_file(L"url_fixer_upper_nonexistent_file.txt"); |
| 320 | fixedup = URLFixerUpper::FixupRelativeFile(dir, nonexistent_file); |
| 321 | EXPECT_NE(std::wstring(L"file:///"), fixedup.substr(0, 8)); |
| 322 | EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file)); |
| 323 | |
| 324 | // make a subdir to make sure relative paths with directories work, also |
| 325 | // test spaces: "app_dir\url fixer-upper dir\url fixer-upper existing file.txt" |
| 326 | std::wstring sub_dir(L"url fixer-upper dir"); |
| 327 | std::wstring sub_file(L"url fixer-upper existing file.txt"); |
| 328 | std::wstring new_dir = dir + L"\\" + sub_dir; |
| 329 | CreateDirectory(new_dir.c_str(), NULL); |
| 330 | ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path)); |
| 331 | |
| 332 | // test file in the subdir |
| 333 | std::wstring relative_file = sub_dir + L"\\" + sub_file; |
| 334 | fixedup = URLFixerUpper::FixupRelativeFile(dir, relative_file); |
| 335 | EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); |
| 336 | |
| 337 | // test file in the subdir with different slashes and escaping |
| 338 | relative_file = sub_dir + L"/" + sub_file; |
| 339 | ReplaceSubstringsAfterOffset(&relative_file, 0, L" ", L"%20"); |
| 340 | fixedup = URLFixerUpper::FixupRelativeFile(dir, relative_file); |
| 341 | EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); |
| 342 | |
| 343 | // test relative directories and duplicate slashes |
| 344 | // (should resolve to the same file as above) |
| 345 | relative_file = sub_dir + L"\\../" + sub_dir + L"\\\\\\.\\" + sub_file; |
| 346 | fixedup = URLFixerUpper::FixupRelativeFile(dir, relative_file); |
| 347 | EXPECT_PRED2(IsMatchingFileURL, fixedup, full_path); |
| 348 | |
| 349 | // done with the subdir |
| 350 | EXPECT_TRUE(DeleteFile(full_path.c_str())); |
| 351 | EXPECT_TRUE(RemoveDirectory(new_dir.c_str())); |
| 352 | } |