Avi Drissman | 6459548 | 2022-09-14 20:52:29 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
eroman | f6c3a1e6 | 2016-02-09 23:13:45 | [diff] [blame] | 5 | // This file contains a set of utility functions related to parsing, |
| 6 | // manipulating, and interacting with URLs and hostnames. These functions are |
| 7 | // intended to be of a text-processing nature, and should not attempt to use any |
| 8 | // networking or blocking services. |
| 9 | |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 10 | #ifndef NET_BASE_URL_UTIL_H_ |
| 11 | #define NET_BASE_URL_UTIL_H_ |
| 12 | |
| 13 | #include <string> |
| 14 | |
tfarina | 77021d6 | 2015-10-11 20:19:03 | [diff] [blame] | 15 | #include "base/strings/string_piece.h" |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 16 | #include "net/base/net_export.h" |
Anudeep Palanki | c22db42 | 2022-10-07 19:36:26 | [diff] [blame^] | 17 | #include "third_party/abseil-cpp/absl/types/optional.h" |
tfarina | 018de6e | 2015-05-26 17:41:20 | [diff] [blame] | 18 | #include "url/third_party/mozilla/url_parse.h" |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 19 | |
| 20 | class GURL; |
| 21 | |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 22 | namespace url { |
| 23 | struct CanonHostInfo; |
Matt Menke | 12b8a5b6 | 2021-12-16 15:15:13 | [diff] [blame] | 24 | class SchemeHostPort; |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 25 | } |
| 26 | |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 27 | namespace net { |
| 28 | |
| 29 | // Returns a new GURL by appending the given query parameter name and the |
| 30 | // value. Unsafe characters in the name and the value are escaped like |
| 31 | // %XX%XX. The original query component is preserved if it's present. |
| 32 | // |
| 33 | // Examples: |
| 34 | // |
| 35 | // AppendQueryParameter(GURL("http://example.com"), "name", "value").spec() |
| 36 | // => "http://example.com?name=value" |
| 37 | // AppendQueryParameter(GURL("http://example.com?x=y"), "name", "value").spec() |
| 38 | // => "http://example.com?x=y&name=value" |
| 39 | NET_EXPORT GURL AppendQueryParameter(const GURL& url, |
| 40 | const std::string& name, |
| 41 | const std::string& value); |
| 42 | |
| 43 | // Returns a new GURL by appending or replacing the given query parameter name |
Anudeep Palanki | c22db42 | 2022-10-07 19:36:26 | [diff] [blame^] | 44 | // and the value. If `name` appears more than once, only the first name-value |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 45 | // pair is replaced. Unsafe characters in the name and the value are escaped |
| 46 | // like %XX%XX. The original query component is preserved if it's present. |
Anudeep Palanki | c22db42 | 2022-10-07 19:36:26 | [diff] [blame^] | 47 | // Using `absl::nullopt` for `value` will remove the `name` parameter. |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 48 | // |
| 49 | // Examples: |
| 50 | // |
| 51 | // AppendOrReplaceQueryParameter( |
| 52 | // GURL("http://example.com"), "name", "new").spec() |
| 53 | // => "http://example.com?name=value" |
| 54 | // AppendOrReplaceQueryParameter( |
| 55 | // GURL("http://example.com?x=y&name=old"), "name", "new").spec() |
| 56 | // => "http://example.com?x=y&name=new" |
Anudeep Palanki | c22db42 | 2022-10-07 19:36:26 | [diff] [blame^] | 57 | // AppendOrReplaceQueryParameter( |
| 58 | // GURL("http://example.com?x=y&name=old"), "name", absl::nullopt).spec() |
| 59 | // => "http://example.com?x=y&" |
| 60 | NET_EXPORT GURL |
| 61 | AppendOrReplaceQueryParameter(const GURL& url, |
| 62 | const std::string& name, |
| 63 | absl::optional<base::StringPiece> value); |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 64 | |
[email protected] | 1a643611 | 2013-10-09 02:49:58 | [diff] [blame] | 65 | // Iterates over the key-value pairs in the query portion of |url|. |
Alex Kalugin | 0ecaa65 | 2021-12-15 04:06:53 | [diff] [blame] | 66 | // NOTE: QueryIterator stores reference to |url| and creates base::StringPiece |
| 67 | // instances which refer to the data inside |url| query. Therefore |url| must |
| 68 | // outlive QueryIterator and all base::StringPiece objects returned from GetKey |
| 69 | // and GetValue methods. |
[email protected] | 1a643611 | 2013-10-09 02:49:58 | [diff] [blame] | 70 | class NET_EXPORT QueryIterator { |
| 71 | public: |
| 72 | explicit QueryIterator(const GURL& url); |
David Bienvenu | a03ac8c | 2020-11-06 15:55:39 | [diff] [blame] | 73 | QueryIterator(const QueryIterator&) = delete; |
| 74 | QueryIterator& operator=(const QueryIterator&) = delete; |
[email protected] | 1a643611 | 2013-10-09 02:49:58 | [diff] [blame] | 75 | ~QueryIterator(); |
| 76 | |
Alex Kalugin | 0ecaa65 | 2021-12-15 04:06:53 | [diff] [blame] | 77 | base::StringPiece GetKey() const; |
| 78 | base::StringPiece GetValue() const; |
[email protected] | 1a643611 | 2013-10-09 02:49:58 | [diff] [blame] | 79 | const std::string& GetUnescapedValue(); |
| 80 | |
| 81 | bool IsAtEnd() const; |
| 82 | void Advance(); |
| 83 | |
| 84 | private: |
| 85 | const GURL& url_; |
[email protected] | ce97ca36 | 2014-04-30 11:35:46 | [diff] [blame] | 86 | url::Component query_; |
[email protected] | 1a643611 | 2013-10-09 02:49:58 | [diff] [blame] | 87 | bool at_end_; |
[email protected] | ce97ca36 | 2014-04-30 11:35:46 | [diff] [blame] | 88 | url::Component key_; |
| 89 | url::Component value_; |
[email protected] | 1a643611 | 2013-10-09 02:49:58 | [diff] [blame] | 90 | std::string unescaped_value_; |
[email protected] | 1a643611 | 2013-10-09 02:49:58 | [diff] [blame] | 91 | }; |
| 92 | |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 93 | // Looks for |search_key| in the query portion of |url|. Returns true if the |
| 94 | // key is found and sets |out_value| to the unescaped value for the key. |
| 95 | // Returns false if the key is not found. |
| 96 | NET_EXPORT bool GetValueForKeyInQuery(const GURL& url, |
| 97 | const std::string& search_key, |
| 98 | std::string* out_value); |
| 99 | |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 100 | // Splits an input of the form <host>[":"<port>] into its consitituent parts. |
| 101 | // Saves the result into |*host| and |*port|. If the input did not have |
| 102 | // the optional port, sets |*port| to -1. |
| 103 | // Returns true if the parsing was successful, false otherwise. |
| 104 | // The returned host is NOT canonicalized, and may be invalid. |
| 105 | // |
| 106 | // IPv6 literals must be specified in a bracketed form, for instance: |
| 107 | // [::1]:90 and [::1] |
| 108 | // |
| 109 | // The resultant |*host| in both cases will be "::1" (not bracketed). |
David Benjamin | 6e44604 | 2018-03-12 19:20:07 | [diff] [blame] | 110 | NET_EXPORT bool ParseHostAndPort(base::StringPiece input, |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 111 | std::string* host, |
| 112 | int* port); |
| 113 | |
| 114 | // Returns a host:port string for the given URL. |
| 115 | NET_EXPORT std::string GetHostAndPort(const GURL& url); |
| 116 | |
| 117 | // Returns a host[:port] string for the given URL, where the port is omitted |
| 118 | // if it is the default for the URL's scheme. |
| 119 | NET_EXPORT std::string GetHostAndOptionalPort(const GURL& url); |
tfarina | 77021d6 | 2015-10-11 20:19:03 | [diff] [blame] | 120 | |
Matt Menke | 12b8a5b6 | 2021-12-16 15:15:13 | [diff] [blame] | 121 | // Just like above, but takes a SchemeHostPort. |
| 122 | NET_EXPORT std::string GetHostAndOptionalPort( |
| 123 | const url::SchemeHostPort& scheme_host_port); |
| 124 | |
tfarina | 77021d6 | 2015-10-11 20:19:03 | [diff] [blame] | 125 | // Returns the hostname by trimming the ending dot, if one exists. |
brettw | b65cd5c | 2016-01-23 00:46:38 | [diff] [blame] | 126 | NET_EXPORT std::string TrimEndingDot(base::StringPiece host); |
tfarina | 77021d6 | 2015-10-11 20:19:03 | [diff] [blame] | 127 | |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 128 | // Returns either the host from |url|, or, if the host is empty, the full spec. |
| 129 | NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url); |
| 130 | |
Lily Chen | da52493 | 2020-02-11 20:19:55 | [diff] [blame] | 131 | // Returns the given domain minus its leftmost label, or the empty string if the |
| 132 | // given domain is just a single label. For normal domain names (not IP |
| 133 | // addresses), this represents the "superdomain" of the given domain. |
| 134 | // Note that this does not take into account anything like the Public Suffix |
| 135 | // List, so the superdomain may end up being a bare eTLD. The returned string is |
| 136 | // not guaranteed to be a valid or canonical hostname, or to make any sense at |
| 137 | // all. |
| 138 | // |
| 139 | // Examples: |
| 140 | // |
| 141 | // GetSuperdomain("assets.example.com") -> "example.com" |
| 142 | // GetSuperdomain("example.net") -> "net" |
| 143 | // GetSuperdomain("littlebox") -> "" |
| 144 | // GetSuperdomain("127.0.0.1") -> "0.0.1" |
| 145 | NET_EXPORT std::string GetSuperdomain(base::StringPiece domain); |
| 146 | |
Lily Chen | f46d8ae8 | 2020-04-23 17:57:32 | [diff] [blame] | 147 | // Returns whether |subdomain| is a subdomain of (or identical to) |
| 148 | // |superdomain|, if both are hostnames (not IP addresses -- for which this |
| 149 | // function is nonsensical). Does not consider the Public Suffix List. |
Lily Chen | 033d702 | 2020-04-27 17:21:20 | [diff] [blame] | 150 | // Returns true if both input strings are empty. |
Lily Chen | f46d8ae8 | 2020-04-23 17:57:32 | [diff] [blame] | 151 | NET_EXPORT bool IsSubdomainOf(base::StringPiece subdomain, |
| 152 | base::StringPiece superdomain); |
| 153 | |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 154 | // Canonicalizes |host| and returns it. Also fills |host_info| with |
| 155 | // IP address information. |host_info| must not be NULL. |
brettw | b65cd5c | 2016-01-23 00:46:38 | [diff] [blame] | 156 | NET_EXPORT std::string CanonicalizeHost(base::StringPiece host, |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 157 | url::CanonHostInfo* host_info); |
| 158 | |
| 159 | // Returns true if |host| is not an IP address and is compliant with a set of |
| 160 | // rules based on RFC 1738 and tweaked to be compatible with the real world. |
| 161 | // The rules are: |
| 162 | // * One or more components separated by '.' |
| 163 | // * Each component contains only alphanumeric characters and '-' or '_' |
| 164 | // * The last component begins with an alphanumeric character |
| 165 | // * Optional trailing dot after last component (means "treat as FQDN") |
| 166 | // |
| 167 | // NOTE: You should only pass in hosts that have been returned from |
| 168 | // CanonicalizeHost(), or you may not get accurate results. |
| 169 | NET_EXPORT bool IsCanonicalizedHostCompliant(const std::string& host); |
| 170 | |
tfarina | 3ad1745 | 2016-01-27 10:34:38 | [diff] [blame] | 171 | // Returns true if |hostname| contains a non-registerable or non-assignable |
| 172 | // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address |
Nathan Parker | 4a78e3d | 2018-04-11 01:16:20 | [diff] [blame] | 173 | // that falls in an range reserved for non-publicly routable networks. |
tfarina | 3ad1745 | 2016-01-27 10:34:38 | [diff] [blame] | 174 | NET_EXPORT bool IsHostnameNonUnique(const std::string& hostname); |
| 175 | |
Rob Wu | f79b3ba | 2018-01-14 01:54:31 | [diff] [blame] | 176 | // Returns true if the host part of |url| is a local host name according to |
| 177 | // HostStringIsLocalhost. |
| 178 | NET_EXPORT bool IsLocalhost(const GURL& url); |
| 179 | |
tfarina | 7ba5a62 | 2016-02-23 23:21:44 | [diff] [blame] | 180 | // Returns true if |host| is one of the local hostnames |
| 181 | // (e.g. "localhost") or IP addresses (IPv4 127.0.0.0/8 or IPv6 ::1). |
Rob Wu | f79b3ba | 2018-01-14 01:54:31 | [diff] [blame] | 182 | // "[::1]" is not detected as a local hostname. Do not use this method to check |
| 183 | // whether the host part of a URL is a local host name; use IsLocalhost instead. |
tfarina | 7ba5a62 | 2016-02-23 23:21:44 | [diff] [blame] | 184 | // |
| 185 | // Note that this function does not check for IP addresses other than |
| 186 | // the above, although other IP addresses may point to the local |
| 187 | // machine. |
Rob Wu | f79b3ba | 2018-01-14 01:54:31 | [diff] [blame] | 188 | NET_EXPORT bool HostStringIsLocalhost(base::StringPiece host); |
tfarina | 7ba5a62 | 2016-02-23 23:21:44 | [diff] [blame] | 189 | |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 190 | // Strip the portions of |url| that aren't core to the network request. |
| 191 | // - user name / password |
| 192 | // - reference section |
| 193 | NET_EXPORT GURL SimplifyUrlForRequest(const GURL& url); |
| 194 | |
Adam Rice | 9bd428b0a | 2019-02-15 06:31:36 | [diff] [blame] | 195 | // Changes scheme "ws" to "http" and "wss" to "https". This is useful for origin |
| 196 | // checks and authentication, where WebSocket URLs are treated as if they were |
| 197 | // HTTP. It is an error to call this function with a url with a scheme other |
| 198 | // than "ws" or "wss". |
| 199 | NET_EXPORT GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url); |
| 200 | |
Lily Chen | e4070ef | 2020-12-22 16:14:38 | [diff] [blame] | 201 | // Returns whether the given url scheme is of a standard scheme type that can |
| 202 | // have hostnames representing domains (i.e. network hosts). |
| 203 | // See url::SchemeType. |
| 204 | NET_EXPORT bool IsStandardSchemeWithNetworkHost(base::StringPiece scheme); |
| 205 | |
tfarina | c38cb95 | 2016-01-14 12:45:01 | [diff] [blame] | 206 | // Extracts the unescaped username/password from |url|, saving the results |
| 207 | // into |*username| and |*password|. |
| 208 | NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url, |
Jan Wilken Dörrie | 739ccc21 | 2021-03-11 18:13:05 | [diff] [blame] | 209 | std::u16string* username, |
| 210 | std::u16string* password); |
tfarina | c38cb95 | 2016-01-14 12:45:01 | [diff] [blame] | 211 | |
tfarina | 7a4a7fd | 2016-01-20 14:23:44 | [diff] [blame] | 212 | // Returns true if the url's host is a Google server. This should only be used |
| 213 | // for histograms and shouldn't be used to affect behavior. |
| 214 | NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url); |
| 215 | |
David Benjamin | f89ca93 | 2019-04-24 23:55:04 | [diff] [blame] | 216 | // Returns true if |host| is the hostname of a Google server. This should only |
| 217 | // be used for histograms and shouldn't be used to affect behavior. |
| 218 | NET_EXPORT_PRIVATE bool IsGoogleHost(base::StringPiece host); |
| 219 | |
tfarina | 9ed7f8c5 | 2016-02-19 17:50:18 | [diff] [blame] | 220 | // This function tests |host| to see if it is of any local hostname form. |
Frédéric Wang | 71698e6 | 2020-12-10 06:13:52 | [diff] [blame] | 221 | // |host| is normalized before being tested. |
| 222 | NET_EXPORT_PRIVATE bool IsLocalHostname(base::StringPiece host); |
tfarina | 9ed7f8c5 | 2016-02-19 17:50:18 | [diff] [blame] | 223 | |
[email protected] | ca93c2aa | 2013-01-31 17:41:01 | [diff] [blame] | 224 | } // namespace net |
| 225 | |
| 226 | #endif // NET_BASE_URL_UTIL_H_ |