[email protected] | 51bcc5d | 2013-04-24 01:41:37 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 4 | |
avi | c0c6031 | 2015-12-21 21:03:50 | [diff] [blame] | 5 | #include "url/gurl.h" |
| 6 | |
| 7 | #include <stddef.h> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 8 | |
| 9 | #include <algorithm> |
Peter Boström | fb60ea0 | 2021-04-05 21:06:12 | [diff] [blame] | 10 | #include <memory> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 11 | #include <ostream> |
Lukasz Anforowicz | 0bc073e | 2019-06-14 19:41:52 | [diff] [blame] | 12 | #include <utility> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 13 | |
Hans Wennborg | 0e22368 | 2020-04-27 21:51:29 | [diff] [blame] | 14 | #include "base/check_op.h" |
Victor Vasiliev | 6a2bb59 | 2019-08-19 23:03:17 | [diff] [blame] | 15 | #include "base/no_destructor.h" |
qyearsley | 7ffaa68 | 2015-08-03 07:03:49 | [diff] [blame] | 16 | #include "base/strings/string_piece.h" |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 17 | #include "base/strings/string_util.h" |
dskiba | 3bc10ee8 | 2017-02-01 01:22:19 | [diff] [blame] | 18 | #include "base/trace_event/memory_usage_estimator.h" |
Alexander Timin | e4fc848 | 2021-02-10 15:27:46 | [diff] [blame] | 19 | #include "third_party/perfetto/include/perfetto/tracing/traced_value.h" |
[email protected] | 318076b | 2013-04-18 21:19:45 | [diff] [blame] | 20 | #include "url/url_canon_stdstring.h" |
| 21 | #include "url/url_util.h" |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 22 | |
[email protected] | e05d81f | 2013-10-22 21:20:31 | [diff] [blame] | 23 | GURL::GURL() : is_valid_(false) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 24 | } |
| 25 | |
| 26 | GURL::GURL(const GURL& other) |
| 27 | : spec_(other.spec_), |
| 28 | is_valid_(other.is_valid_), |
[email protected] | e05d81f | 2013-10-22 21:20:31 | [diff] [blame] | 29 | parsed_(other.parsed_) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 30 | if (other.inner_url_) |
Peter Boström | fb60ea0 | 2021-04-05 21:06:12 | [diff] [blame] | 31 | inner_url_ = std::make_unique<GURL>(*other.inner_url_); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 32 | // Valid filesystem urls should always have an inner_url_. |
| 33 | DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_); |
| 34 | } |
| 35 | |
brettw | f78cc27 | 2017-03-24 16:36:42 | [diff] [blame] | 36 | GURL::GURL(GURL&& other) noexcept |
sclittle | 376085b3 | 2017-03-14 21:08:41 | [diff] [blame] | 37 | : spec_(std::move(other.spec_)), |
| 38 | is_valid_(other.is_valid_), |
| 39 | parsed_(other.parsed_), |
| 40 | inner_url_(std::move(other.inner_url_)) { |
| 41 | other.is_valid_ = false; |
| 42 | other.parsed_ = url::Parsed(); |
| 43 | } |
| 44 | |
brettw | dfbcc3b | 2016-01-20 01:49:17 | [diff] [blame] | 45 | GURL::GURL(base::StringPiece url_string) { |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 46 | InitCanonical(url_string, true); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 47 | } |
| 48 | |
brettw | dfbcc3b | 2016-01-20 01:49:17 | [diff] [blame] | 49 | GURL::GURL(base::StringPiece16 url_string) { |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 50 | InitCanonical(url_string, true); |
| 51 | } |
| 52 | |
| 53 | GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) { |
Peter Kasting | 4d49cd4b | 2021-05-18 15:39:48 | [diff] [blame] | 54 | InitCanonical(url_string, false); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 55 | } |
| 56 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 57 | GURL::GURL(const char* canonical_spec, |
| 58 | size_t canonical_spec_len, |
| 59 | const url::Parsed& parsed, |
| 60 | bool is_valid) |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 61 | : spec_(canonical_spec, canonical_spec_len), |
| 62 | is_valid_(is_valid), |
[email protected] | e05d81f | 2013-10-22 21:20:31 | [diff] [blame] | 63 | parsed_(parsed) { |
[email protected] | 19b61f97 | 2013-07-26 13:30:09 | [diff] [blame] | 64 | InitializeFromCanonicalSpec(); |
| 65 | } |
| 66 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 67 | GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid) |
ki.stfu | cebea5e | 2016-06-04 07:05:36 | [diff] [blame] | 68 | : spec_(std::move(canonical_spec)), is_valid_(is_valid), parsed_(parsed) { |
[email protected] | 19b61f97 | 2013-07-26 13:30:09 | [diff] [blame] | 69 | InitializeFromCanonicalSpec(); |
| 70 | } |
| 71 | |
Peter Kasting | 4d49cd4b | 2021-05-18 15:39:48 | [diff] [blame] | 72 | template <typename T, typename CharT> |
| 73 | void GURL::InitCanonical(T input_spec, bool trim_path_end) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 74 | url::StdStringCanonOutput output(&spec_); |
| 75 | is_valid_ = url::Canonicalize( |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 76 | input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end, |
| 77 | NULL, &output, &parsed_); |
| 78 | |
| 79 | output.Complete(); // Must be done before using string. |
| 80 | if (is_valid_ && SchemeIsFileSystem()) { |
Peter Boström | fb60ea0 | 2021-04-05 21:06:12 | [diff] [blame] | 81 | inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(), |
| 82 | *parsed_.inner_parsed(), true); |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 83 | } |
csharrison | 475851da | 2016-12-17 02:19:42 | [diff] [blame] | 84 | // Valid URLs always have non-empty specs. |
| 85 | DCHECK(!is_valid_ || !spec_.empty()); |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 86 | } |
| 87 | |
[email protected] | 19b61f97 | 2013-07-26 13:30:09 | [diff] [blame] | 88 | void GURL::InitializeFromCanonicalSpec() { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 89 | if (is_valid_ && SchemeIsFileSystem()) { |
Peter Boström | fb60ea0 | 2021-04-05 21:06:12 | [diff] [blame] | 90 | inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(), |
| 91 | *parsed_.inner_parsed(), true); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 92 | } |
| 93 | |
| 94 | #ifndef NDEBUG |
| 95 | // For testing purposes, check that the parsed canonical URL is identical to |
| 96 | // what we would have produced. Skip checking for invalid URLs have no meaning |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 97 | // and we can't always canonicalize then reproducibly. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 98 | if (is_valid_) { |
csharrison | 475851da | 2016-12-17 02:19:42 | [diff] [blame] | 99 | DCHECK(!spec_.empty()); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 100 | url::Component scheme; |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 101 | // We can't do this check on the inner_url of a filesystem URL, as |
| 102 | // canonical_spec actually points to the start of the outer URL, so we'd |
| 103 | // end up with infinite recursion in this constructor. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 104 | if (!url::FindAndCompareScheme(spec_.data(), spec_.length(), |
[email protected] | 08dc705 | 2014-06-18 07:57:49 | [diff] [blame] | 105 | url::kFileSystemScheme, &scheme) || |
[email protected] | 19b61f97 | 2013-07-26 13:30:09 | [diff] [blame] | 106 | scheme.begin == parsed_.scheme.begin) { |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 107 | // We need to retain trailing whitespace on path URLs, as the |parsed_| |
| 108 | // spec we originally received may legitimately contain trailing white- |
| 109 | // space on the path or components e.g. if the #ref has been |
| 110 | // removed from a "foo:hello #ref" URL (see http://crbug.com/291747). |
| 111 | GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 112 | |
| 113 | DCHECK(test_url.is_valid_ == is_valid_); |
| 114 | DCHECK(test_url.spec_ == spec_); |
| 115 | |
| 116 | DCHECK(test_url.parsed_.scheme == parsed_.scheme); |
| 117 | DCHECK(test_url.parsed_.username == parsed_.username); |
| 118 | DCHECK(test_url.parsed_.password == parsed_.password); |
| 119 | DCHECK(test_url.parsed_.host == parsed_.host); |
| 120 | DCHECK(test_url.parsed_.port == parsed_.port); |
| 121 | DCHECK(test_url.parsed_.path == parsed_.path); |
| 122 | DCHECK(test_url.parsed_.query == parsed_.query); |
| 123 | DCHECK(test_url.parsed_.ref == parsed_.ref); |
| 124 | } |
| 125 | } |
| 126 | #endif |
| 127 | } |
| 128 | |
Chris Watkins | 3e06be1 | 2017-11-29 01:40:54 | [diff] [blame] | 129 | GURL::~GURL() = default; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 130 | |
sclittle | 376085b3 | 2017-03-14 21:08:41 | [diff] [blame] | 131 | GURL& GURL::operator=(const GURL& other) { |
| 132 | spec_ = other.spec_; |
| 133 | is_valid_ = other.is_valid_; |
| 134 | parsed_ = other.parsed_; |
| 135 | |
| 136 | if (!other.inner_url_) |
| 137 | inner_url_.reset(); |
| 138 | else if (inner_url_) |
| 139 | *inner_url_ = *other.inner_url_; |
| 140 | else |
Peter Boström | fb60ea0 | 2021-04-05 21:06:12 | [diff] [blame] | 141 | inner_url_ = std::make_unique<GURL>(*other.inner_url_); |
sclittle | 376085b3 | 2017-03-14 21:08:41 | [diff] [blame] | 142 | |
| 143 | return *this; |
| 144 | } |
| 145 | |
Jüri Valdmann | f841ac2 | 2018-05-18 22:36:28 | [diff] [blame] | 146 | GURL& GURL::operator=(GURL&& other) noexcept { |
sclittle | 376085b3 | 2017-03-14 21:08:41 | [diff] [blame] | 147 | spec_ = std::move(other.spec_); |
| 148 | is_valid_ = other.is_valid_; |
| 149 | parsed_ = other.parsed_; |
| 150 | inner_url_ = std::move(other.inner_url_); |
| 151 | |
| 152 | other.is_valid_ = false; |
| 153 | other.parsed_ = url::Parsed(); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 154 | return *this; |
| 155 | } |
| 156 | |
| 157 | const std::string& GURL::spec() const { |
| 158 | if (is_valid_ || spec_.empty()) |
| 159 | return spec_; |
| 160 | |
| 161 | DCHECK(false) << "Trying to get the spec of an invalid URL!"; |
Daniel Cheng | 0a9188d9 | 2018-08-04 04:48:23 | [diff] [blame] | 162 | return base::EmptyString(); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 163 | } |
| 164 | |
dcheng | 3a08877 | 2014-12-06 09:58:21 | [diff] [blame] | 165 | bool GURL::operator<(const GURL& other) const { |
| 166 | return spec_ < other.spec_; |
| 167 | } |
| 168 | |
| 169 | bool GURL::operator>(const GURL& other) const { |
| 170 | return spec_ > other.spec_; |
| 171 | } |
| 172 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 173 | // Note: code duplicated below (it's inconvenient to use a template here). |
Victor Costan | e773216 | 2018-08-30 18:29:40 | [diff] [blame] | 174 | GURL GURL::Resolve(base::StringPiece relative) const { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 175 | // Not allowed for invalid URLs. |
| 176 | if (!is_valid_) |
| 177 | return GURL(); |
| 178 | |
| 179 | GURL result; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 180 | url::StdStringCanonOutput output(&result.spec_); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 181 | if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), |
| 182 | parsed_, relative.data(), |
| 183 | static_cast<int>(relative.length()), |
mkwst | 45f25db | 2015-07-21 04:03:50 | [diff] [blame] | 184 | nullptr, &output, &result.parsed_)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 185 | // Error resolving, return an empty URL. |
| 186 | return GURL(); |
| 187 | } |
| 188 | |
| 189 | output.Complete(); |
| 190 | result.is_valid_ = true; |
| 191 | if (result.SchemeIsFileSystem()) { |
Peter Boström | fb60ea0 | 2021-04-05 21:06:12 | [diff] [blame] | 192 | result.inner_url_ = |
| 193 | std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), |
| 194 | *result.parsed_.inner_parsed(), true); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 195 | } |
| 196 | return result; |
| 197 | } |
| 198 | |
| 199 | // Note: code duplicated above (it's inconvenient to use a template here). |
Victor Costan | e773216 | 2018-08-30 18:29:40 | [diff] [blame] | 200 | GURL GURL::Resolve(base::StringPiece16 relative) const { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 201 | // Not allowed for invalid URLs. |
| 202 | if (!is_valid_) |
| 203 | return GURL(); |
| 204 | |
| 205 | GURL result; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 206 | url::StdStringCanonOutput output(&result.spec_); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 207 | if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), |
| 208 | parsed_, relative.data(), |
| 209 | static_cast<int>(relative.length()), |
mkwst | 45f25db | 2015-07-21 04:03:50 | [diff] [blame] | 210 | nullptr, &output, &result.parsed_)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 211 | // Error resolving, return an empty URL. |
| 212 | return GURL(); |
| 213 | } |
| 214 | |
| 215 | output.Complete(); |
| 216 | result.is_valid_ = true; |
| 217 | if (result.SchemeIsFileSystem()) { |
Peter Boström | fb60ea0 | 2021-04-05 21:06:12 | [diff] [blame] | 218 | result.inner_url_ = |
| 219 | std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(), |
| 220 | *result.parsed_.inner_parsed(), true); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 221 | } |
| 222 | return result; |
| 223 | } |
| 224 | |
| 225 | // Note: code duplicated below (it's inconvenient to use a template here). |
| 226 | GURL GURL::ReplaceComponents( |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 227 | const url::Replacements<char>& replacements) const { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 228 | GURL result; |
| 229 | |
| 230 | // Not allowed for invalid URLs. |
| 231 | if (!is_valid_) |
| 232 | return GURL(); |
| 233 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 234 | url::StdStringCanonOutput output(&result.spec_); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 235 | result.is_valid_ = url::ReplaceComponents( |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 236 | spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements, |
| 237 | NULL, &output, &result.parsed_); |
| 238 | |
| 239 | output.Complete(); |
Rakina Zata Amni | 9b5d9b2 | 2021-07-12 16:19:37 | [diff] [blame] | 240 | |
| 241 | ProcessFileOrFileSystemURLAfterReplaceComponents(result); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 242 | return result; |
| 243 | } |
| 244 | |
| 245 | // Note: code duplicated above (it's inconvenient to use a template here). |
| 246 | GURL GURL::ReplaceComponents( |
Jan Wilken Dörrie | 5aad5c2 | 2021-03-08 21:44:12 | [diff] [blame] | 247 | const url::Replacements<char16_t>& replacements) const { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 248 | GURL result; |
| 249 | |
| 250 | // Not allowed for invalid URLs. |
| 251 | if (!is_valid_) |
| 252 | return GURL(); |
| 253 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 254 | url::StdStringCanonOutput output(&result.spec_); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 255 | result.is_valid_ = url::ReplaceComponents( |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 256 | spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements, |
| 257 | NULL, &output, &result.parsed_); |
| 258 | |
| 259 | output.Complete(); |
Rakina Zata Amni | 9b5d9b2 | 2021-07-12 16:19:37 | [diff] [blame] | 260 | |
| 261 | ProcessFileOrFileSystemURLAfterReplaceComponents(result); |
| 262 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 263 | return result; |
| 264 | } |
| 265 | |
Rakina Zata Amni | 9b5d9b2 | 2021-07-12 16:19:37 | [diff] [blame] | 266 | void GURL::ProcessFileOrFileSystemURLAfterReplaceComponents(GURL& url) const { |
| 267 | if (!url.is_valid_) |
| 268 | return; |
| 269 | if (url.SchemeIsFileSystem()) { |
| 270 | url.inner_url_ = |
| 271 | std::make_unique<GURL>(url.spec_.data(), url.parsed_.Length(), |
| 272 | *url.parsed_.inner_parsed(), true); |
| 273 | } |
| 274 | #ifdef WIN32 |
| 275 | if (url.SchemeIsFile()) { |
| 276 | // On Win32, some file URLs created through ReplaceComponents used to lose |
| 277 | // its hostname after getting reparsed (e.g. when it's sent through IPC) due |
| 278 | // to special handling of file URLs with Windows-drive paths in the URL |
| 279 | // parser. To make the behavior for URLs modified through ReplaceComponents |
| 280 | // (instead of getting fully reparsed) the same, immediately reparse the |
| 281 | // URL here to trigger the special handling. |
| 282 | // See https://crbug.com/1214098. |
| 283 | url = GURL(url.spec()); |
| 284 | } |
| 285 | #endif |
| 286 | } |
| 287 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 288 | GURL GURL::GetOrigin() const { |
| 289 | // This doesn't make sense for invalid or nonstandard URLs, so return |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 290 | // the empty URL. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 291 | if (!is_valid_ || !IsStandard()) |
| 292 | return GURL(); |
| 293 | |
| 294 | if (SchemeIsFileSystem()) |
| 295 | return inner_url_->GetOrigin(); |
| 296 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 297 | url::Replacements<char> replacements; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 298 | replacements.ClearUsername(); |
| 299 | replacements.ClearPassword(); |
| 300 | replacements.ClearPath(); |
| 301 | replacements.ClearQuery(); |
| 302 | replacements.ClearRef(); |
| 303 | |
| 304 | return ReplaceComponents(replacements); |
| 305 | } |
| 306 | |
[email protected] | 6b775ee | 2014-03-20 20:27:25 | [diff] [blame] | 307 | GURL GURL::GetAsReferrer() const { |
David Van Cleve | 5607fbe5 | 2020-06-24 19:52:16 | [diff] [blame] | 308 | if (!is_valid() || !IsReferrerScheme(spec_.data(), parsed_.scheme)) |
jochen | 4245039 | 2014-11-24 19:47:22 | [diff] [blame] | 309 | return GURL(); |
| 310 | |
| 311 | if (!has_ref() && !has_username() && !has_password()) |
[email protected] | 6b775ee | 2014-03-20 20:27:25 | [diff] [blame] | 312 | return GURL(*this); |
| 313 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 314 | url::Replacements<char> replacements; |
[email protected] | 6b775ee | 2014-03-20 20:27:25 | [diff] [blame] | 315 | replacements.ClearRef(); |
| 316 | replacements.ClearUsername(); |
| 317 | replacements.ClearPassword(); |
| 318 | return ReplaceComponents(replacements); |
| 319 | } |
| 320 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 321 | GURL GURL::GetWithEmptyPath() const { |
| 322 | // This doesn't make sense for invalid or nonstandard URLs, so return |
| 323 | // the empty URL. |
| 324 | if (!is_valid_ || !IsStandard()) |
| 325 | return GURL(); |
| 326 | |
| 327 | // We could optimize this since we know that the URL is canonical, and we are |
| 328 | // appending a canonical path, so avoiding re-parsing. |
| 329 | GURL other(*this); |
| 330 | if (parsed_.path.len == 0) |
| 331 | return other; |
| 332 | |
| 333 | // Clear everything after the path. |
| 334 | other.parsed_.query.reset(); |
| 335 | other.parsed_.ref.reset(); |
| 336 | |
| 337 | // Set the path, since the path is longer than one, we can just set the |
| 338 | // first character and resize. |
| 339 | other.spec_[other.parsed_.path.begin] = '/'; |
| 340 | other.parsed_.path.len = 1; |
| 341 | other.spec_.resize(other.parsed_.path.begin + 1); |
| 342 | return other; |
| 343 | } |
| 344 | |
Giovanni Ortuño Urquidi | 61b24eda | 2017-08-09 08:13:10 | [diff] [blame] | 345 | GURL GURL::GetWithoutFilename() const { |
| 346 | return Resolve("."); |
| 347 | } |
| 348 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 349 | bool GURL::IsStandard() const { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 350 | return url::IsStandard(spec_.data(), parsed_.scheme); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 351 | } |
| 352 | |
clamy | 12bca18b | 2017-02-10 15:33:07 | [diff] [blame] | 353 | bool GURL::IsAboutBlank() const { |
Lukasz Anforowicz | 0bc073e | 2019-06-14 19:41:52 | [diff] [blame] | 354 | return IsAboutUrl(url::kAboutBlankPath); |
| 355 | } |
clamy | 12bca18b | 2017-02-10 15:33:07 | [diff] [blame] | 356 | |
Lukasz Anforowicz | 0bc073e | 2019-06-14 19:41:52 | [diff] [blame] | 357 | bool GURL::IsAboutSrcdoc() const { |
| 358 | return IsAboutUrl(url::kAboutSrcdocPath); |
clamy | 12bca18b | 2017-02-10 15:33:07 | [diff] [blame] | 359 | } |
| 360 | |
brettw | adc84688 | 2015-09-25 01:16:22 | [diff] [blame] | 361 | bool GURL::SchemeIs(base::StringPiece lower_ascii_scheme) const { |
| 362 | DCHECK(base::IsStringASCII(lower_ascii_scheme)); |
| 363 | DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme); |
| 364 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 365 | if (parsed_.scheme.len <= 0) |
brettw | adc84688 | 2015-09-25 01:16:22 | [diff] [blame] | 366 | return lower_ascii_scheme.empty(); |
| 367 | return scheme_piece() == lower_ascii_scheme; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 368 | } |
| 369 | |
[email protected] | 91f568903 | 2013-08-22 01:43:33 | [diff] [blame] | 370 | bool GURL::SchemeIsHTTPOrHTTPS() const { |
[email protected] | 9d5877e | 2014-06-02 07:34:35 | [diff] [blame] | 371 | return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme); |
[email protected] | 91f568903 | 2013-08-22 01:43:33 | [diff] [blame] | 372 | } |
| 373 | |
[email protected] | 9690b99 | 2013-11-22 07:40:46 | [diff] [blame] | 374 | bool GURL::SchemeIsWSOrWSS() const { |
[email protected] | 9d5877e | 2014-06-02 07:34:35 | [diff] [blame] | 375 | return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme); |
[email protected] | 9690b99 | 2013-11-22 07:40:46 | [diff] [blame] | 376 | } |
| 377 | |
Maks Orlovich | 44525ce | 2019-02-25 14:17:58 | [diff] [blame] | 378 | bool GURL::SchemeIsCryptographic() const { |
| 379 | if (parsed_.scheme.len <= 0) |
| 380 | return false; |
| 381 | return SchemeIsCryptographic(scheme_piece()); |
| 382 | } |
| 383 | |
| 384 | bool GURL::SchemeIsCryptographic(base::StringPiece lower_ascii_scheme) { |
| 385 | DCHECK(base::IsStringASCII(lower_ascii_scheme)); |
| 386 | DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme); |
| 387 | |
| 388 | return lower_ascii_scheme == url::kHttpsScheme || |
| 389 | lower_ascii_scheme == url::kWssScheme; |
| 390 | } |
| 391 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 392 | int GURL::IntPort() const { |
| 393 | if (parsed_.port.is_nonempty()) |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 394 | return url::ParsePort(spec_.data(), parsed_.port); |
| 395 | return url::PORT_UNSPECIFIED; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 396 | } |
| 397 | |
| 398 | int GURL::EffectiveIntPort() const { |
| 399 | int int_port = IntPort(); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 400 | if (int_port == url::PORT_UNSPECIFIED && IsStandard()) |
| 401 | return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin, |
| 402 | parsed_.scheme.len); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 403 | return int_port; |
| 404 | } |
| 405 | |
| 406 | std::string GURL::ExtractFileName() const { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 407 | url::Component file_component; |
| 408 | url::ExtractFileName(spec_.data(), parsed_.path, &file_component); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 409 | return ComponentString(file_component); |
| 410 | } |
| 411 | |
David Van Cleve | 5f37444 | 2019-11-06 15:01:17 | [diff] [blame] | 412 | base::StringPiece GURL::PathForRequestPiece() const { |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 413 | DCHECK(parsed_.path.len > 0) |
| 414 | << "Canonical path for requests should be non-empty"; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 415 | if (parsed_.ref.len >= 0) { |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 416 | // Clip off the reference when it exists. The reference starts after the |
| 417 | // #-sign, so we have to subtract one to also remove it. |
David Van Cleve | 5f37444 | 2019-11-06 15:01:17 | [diff] [blame] | 418 | return base::StringPiece(&spec_[parsed_.path.begin], |
| 419 | parsed_.ref.begin - parsed_.path.begin - 1); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 420 | } |
| 421 | // Compute the actual path length, rather than depending on the spec's |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 422 | // terminator. If we're an inner_url, our spec continues on into our outer |
| 423 | // URL's path/query/ref. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 424 | int path_len = parsed_.path.len; |
| 425 | if (parsed_.query.is_valid()) |
| 426 | path_len = parsed_.query.end() - parsed_.path.begin; |
| 427 | |
David Van Cleve | 5f37444 | 2019-11-06 15:01:17 | [diff] [blame] | 428 | return base::StringPiece(&spec_[parsed_.path.begin], path_len); |
| 429 | } |
| 430 | |
| 431 | std::string GURL::PathForRequest() const { |
Peter Kasting | 95e78e4 | 2021-04-29 23:37:51 | [diff] [blame] | 432 | return std::string(PathForRequestPiece()); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 433 | } |
| 434 | |
| 435 | std::string GURL::HostNoBrackets() const { |
Peter Kasting | 95e78e4 | 2021-04-29 23:37:51 | [diff] [blame] | 436 | return std::string(HostNoBracketsPiece()); |
ricea | 1c0de2f | 2017-07-03 08:21:43 | [diff] [blame] | 437 | } |
| 438 | |
| 439 | base::StringPiece GURL::HostNoBracketsPiece() const { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 440 | // If host looks like an IPv6 literal, strip the square brackets. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 441 | url::Component h(parsed_.host); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 442 | if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') { |
| 443 | h.begin++; |
| 444 | h.len -= 2; |
| 445 | } |
ricea | 1c0de2f | 2017-07-03 08:21:43 | [diff] [blame] | 446 | return ComponentStringPiece(h); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 447 | } |
| 448 | |
[email protected] | 5f50c5d | 2013-10-24 19:05:17 | [diff] [blame] | 449 | std::string GURL::GetContent() const { |
Stephen McGruer | b52ebdc | 2018-10-31 22:06:04 | [diff] [blame] | 450 | if (!is_valid_) |
| 451 | return std::string(); |
| 452 | std::string content = ComponentString(parsed_.GetContent()); |
| 453 | if (!SchemeIs(url::kJavaScriptScheme) && parsed_.ref.len >= 0) |
| 454 | content.erase(content.size() - parsed_.ref.len - 1); |
| 455 | return content; |
[email protected] | 5f50c5d | 2013-10-24 19:05:17 | [diff] [blame] | 456 | } |
| 457 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 458 | bool GURL::HostIsIPAddress() const { |
csharrison | 475851da | 2016-12-17 02:19:42 | [diff] [blame] | 459 | return is_valid_ && url::HostIsIPAddress(host_piece()); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 460 | } |
| 461 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 462 | const GURL& GURL::EmptyGURL() { |
Victor Vasiliev | 6a2bb59 | 2019-08-19 23:03:17 | [diff] [blame] | 463 | static base::NoDestructor<GURL> empty_gurl; |
| 464 | return *empty_gurl; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 465 | } |
| 466 | |
Charles Harrison | 81dc2fb | 2017-08-30 23:41:12 | [diff] [blame] | 467 | bool GURL::DomainIs(base::StringPiece canonical_domain) const { |
pkalinnikov | 054f403 | 2016-08-31 10:54:17 | [diff] [blame] | 468 | if (!is_valid_) |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 469 | return false; |
| 470 | |
pkalinnikov | 054f403 | 2016-08-31 10:54:17 | [diff] [blame] | 471 | // FileSystem URLs have empty host_piece, so check this first. |
Charles Harrison | 81dc2fb | 2017-08-30 23:41:12 | [diff] [blame] | 472 | if (inner_url_ && SchemeIsFileSystem()) |
| 473 | return inner_url_->DomainIs(canonical_domain); |
| 474 | return url::DomainIs(host_piece(), canonical_domain); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 475 | } |
| 476 | |
arthursonzogni | 9c873d8c | 2017-02-08 17:58:05 | [diff] [blame] | 477 | bool GURL::EqualsIgnoringRef(const GURL& other) const { |
| 478 | int ref_position = parsed_.CountCharactersBefore(url::Parsed::REF, true); |
| 479 | int ref_position_other = |
| 480 | other.parsed_.CountCharactersBefore(url::Parsed::REF, true); |
| 481 | return base::StringPiece(spec_).substr(0, ref_position) == |
| 482 | base::StringPiece(other.spec_).substr(0, ref_position_other); |
| 483 | } |
| 484 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 485 | void GURL::Swap(GURL* other) { |
| 486 | spec_.swap(other->spec_); |
| 487 | std::swap(is_valid_, other->is_valid_); |
| 488 | std::swap(parsed_, other->parsed_); |
[email protected] | e05d81f | 2013-10-22 21:20:31 | [diff] [blame] | 489 | inner_url_.swap(other->inner_url_); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 490 | } |
| 491 | |
dskiba | 3bc10ee8 | 2017-02-01 01:22:19 | [diff] [blame] | 492 | size_t GURL::EstimateMemoryUsage() const { |
| 493 | return base::trace_event::EstimateMemoryUsage(spec_) + |
| 494 | base::trace_event::EstimateMemoryUsage(inner_url_) + |
| 495 | (parsed_.inner_parsed() ? sizeof(url::Parsed) : 0); |
| 496 | } |
| 497 | |
Lukasz Anforowicz | 0bc073e | 2019-06-14 19:41:52 | [diff] [blame] | 498 | bool GURL::IsAboutUrl(base::StringPiece allowed_path) const { |
| 499 | if (!SchemeIs(url::kAboutScheme)) |
| 500 | return false; |
| 501 | |
| 502 | if (has_host() || has_username() || has_password() || has_port()) |
| 503 | return false; |
| 504 | |
Lukasz Anforowicz | fb3733f | 2021-01-11 19:29:29 | [diff] [blame] | 505 | return IsAboutPath(path_piece(), allowed_path); |
| 506 | } |
| 507 | |
| 508 | // static |
| 509 | bool GURL::IsAboutPath(base::StringPiece actual_path, |
| 510 | base::StringPiece allowed_path) { |
| 511 | if (!base::StartsWith(actual_path, allowed_path)) |
Lukasz Anforowicz | 0bc073e | 2019-06-14 19:41:52 | [diff] [blame] | 512 | return false; |
| 513 | |
Lukasz Anforowicz | fb3733f | 2021-01-11 19:29:29 | [diff] [blame] | 514 | if (actual_path.size() == allowed_path.size()) { |
| 515 | DCHECK_EQ(actual_path, allowed_path); |
Lukasz Anforowicz | 0bc073e | 2019-06-14 19:41:52 | [diff] [blame] | 516 | return true; |
| 517 | } |
| 518 | |
Lukasz Anforowicz | fb3733f | 2021-01-11 19:29:29 | [diff] [blame] | 519 | if ((actual_path.size() == allowed_path.size() + 1) && |
| 520 | actual_path.back() == '/') { |
Peter Kasting | 95e78e4 | 2021-04-29 23:37:51 | [diff] [blame] | 521 | DCHECK_EQ(actual_path, std::string(allowed_path) + '/'); |
Lukasz Anforowicz | 0bc073e | 2019-06-14 19:41:52 | [diff] [blame] | 522 | return true; |
| 523 | } |
| 524 | |
| 525 | return false; |
| 526 | } |
| 527 | |
Alexander Timin | 1b6b272 | 2021-04-21 01:34:27 | [diff] [blame] | 528 | void GURL::WriteIntoTrace(perfetto::TracedValue context) const { |
Alexander Timin | e4fc848 | 2021-02-10 15:27:46 | [diff] [blame] | 529 | std::move(context).WriteString(possibly_invalid_spec()); |
| 530 | } |
| 531 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 532 | std::ostream& operator<<(std::ostream& out, const GURL& url) { |
| 533 | return out << url.possibly_invalid_spec(); |
| 534 | } |
csharrison | ebeca8e | 2016-10-18 02:35:36 | [diff] [blame] | 535 | |
| 536 | bool operator==(const GURL& x, const GURL& y) { |
| 537 | return x.possibly_invalid_spec() == y.possibly_invalid_spec(); |
| 538 | } |
| 539 | |
| 540 | bool operator!=(const GURL& x, const GURL& y) { |
| 541 | return !(x == y); |
| 542 | } |
| 543 | |
| 544 | bool operator==(const GURL& x, const base::StringPiece& spec) { |
Charlie Harrison | 84e35ca | 2020-05-12 19:53:08 | [diff] [blame] | 545 | DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec) |
| 546 | << "Comparisons of GURLs and strings must ensure as a precondition that " |
| 547 | "the string is fully canonicalized."; |
csharrison | ebeca8e | 2016-10-18 02:35:36 | [diff] [blame] | 548 | return x.possibly_invalid_spec() == spec; |
| 549 | } |
| 550 | |
Andrew Moylan | 89296468 | 2017-10-10 07:09:28 | [diff] [blame] | 551 | bool operator==(const base::StringPiece& spec, const GURL& x) { |
| 552 | return x == spec; |
| 553 | } |
| 554 | |
csharrison | ebeca8e | 2016-10-18 02:35:36 | [diff] [blame] | 555 | bool operator!=(const GURL& x, const base::StringPiece& spec) { |
| 556 | return !(x == spec); |
| 557 | } |
Andrew Moylan | 89296468 | 2017-10-10 07:09:28 | [diff] [blame] | 558 | |
| 559 | bool operator!=(const base::StringPiece& spec, const GURL& x) { |
| 560 | return !(x == spec); |
| 561 | } |