blob: 3038f208abd62bf5dcb17b2baaad97ab02be4289 [file] [log] [blame]
Avi Drissman64595482022-09-14 20:52:291// Copyright 2013 The Chromium Authors
[email protected]ca93c2aa2013-01-31 17:41:012// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
eromanf6c3a1e62016-02-09 23:13:455// This file contains a set of utility functions related to parsing,
6// manipulating, and interacting with URLs and hostnames. These functions are
7// intended to be of a text-processing nature, and should not attempt to use any
8// networking or blocking services.
9
[email protected]ca93c2aa2013-01-31 17:41:0110#ifndef NET_BASE_URL_UTIL_H_
11#define NET_BASE_URL_UTIL_H_
12
13#include <string>
14
Ali Hijazi55179192022-11-09 16:28:5115#include "base/memory/raw_ref.h"
tfarina77021d62015-10-11 20:19:0316#include "base/strings/string_piece.h"
[email protected]ca93c2aa2013-01-31 17:41:0117#include "net/base/net_export.h"
Anudeep Palankic22db422022-10-07 19:36:2618#include "third_party/abseil-cpp/absl/types/optional.h"
tfarina018de6e2015-05-26 17:41:2019#include "url/third_party/mozilla/url_parse.h"
[email protected]ca93c2aa2013-01-31 17:41:0120
21class GURL;
22
tfarina7a4a7fd2016-01-20 14:23:4423namespace url {
24struct CanonHostInfo;
Matt Menke12b8a5b62021-12-16 15:15:1325class SchemeHostPort;
Eric Orth35126b62022-12-01 22:12:1826} // namespace url
tfarina7a4a7fd2016-01-20 14:23:4427
[email protected]ca93c2aa2013-01-31 17:41:0128namespace net {
29
30// Returns a new GURL by appending the given query parameter name and the
31// value. Unsafe characters in the name and the value are escaped like
32// %XX%XX. The original query component is preserved if it's present.
33//
34// Examples:
35//
36// AppendQueryParameter(GURL("http://example.com"), "name", "value").spec()
37// => "http://example.com?name=value"
38// AppendQueryParameter(GURL("http://example.com?x=y"), "name", "value").spec()
39// => "http://example.com?x=y&name=value"
40NET_EXPORT GURL AppendQueryParameter(const GURL& url,
David Benjamin044f8972022-10-24 18:50:0841 base::StringPiece name,
42 base::StringPiece value);
[email protected]ca93c2aa2013-01-31 17:41:0143
44// Returns a new GURL by appending or replacing the given query parameter name
Anudeep Palankic22db422022-10-07 19:36:2645// and the value. If `name` appears more than once, only the first name-value
[email protected]ca93c2aa2013-01-31 17:41:0146// pair is replaced. Unsafe characters in the name and the value are escaped
47// like %XX%XX. The original query component is preserved if it's present.
Anudeep Palankic22db422022-10-07 19:36:2648// Using `absl::nullopt` for `value` will remove the `name` parameter.
[email protected]ca93c2aa2013-01-31 17:41:0149//
50// Examples:
51//
52// AppendOrReplaceQueryParameter(
53// GURL("http://example.com"), "name", "new").spec()
54// => "http://example.com?name=value"
55// AppendOrReplaceQueryParameter(
56// GURL("http://example.com?x=y&name=old"), "name", "new").spec()
57// => "http://example.com?x=y&name=new"
Anudeep Palankic22db422022-10-07 19:36:2658// AppendOrReplaceQueryParameter(
59// GURL("http://example.com?x=y&name=old"), "name", absl::nullopt).spec()
60// => "http://example.com?x=y&"
61NET_EXPORT GURL
62AppendOrReplaceQueryParameter(const GURL& url,
David Benjamin044f8972022-10-24 18:50:0863 base::StringPiece name,
Anudeep Palankic22db422022-10-07 19:36:2664 absl::optional<base::StringPiece> value);
[email protected]ca93c2aa2013-01-31 17:41:0165
Etienne Noel8e790322022-11-08 19:19:1866// Returns a new GURL by appending the provided ref (also named fragment).
67// Unsafe characters are escaped. The original fragment is replaced
68// if it's present.
69//
70// Examples:
71//
72// AppendOrReplaceRef(
73// GURL("http://example.com"), "ref").spec()
74// => "http://example.com#ref"
75// AppendOrReplaceRef(
76// GURL("http://example.com#ref"), "ref2").spec()
77// => "http://example.com#ref2"
78NET_EXPORT GURL AppendOrReplaceRef(const GURL& url,
79 const base::StringPiece& ref);
80
[email protected]1a6436112013-10-09 02:49:5881// Iterates over the key-value pairs in the query portion of |url|.
Alex Kalugin0ecaa652021-12-15 04:06:5382// NOTE: QueryIterator stores reference to |url| and creates base::StringPiece
83// instances which refer to the data inside |url| query. Therefore |url| must
84// outlive QueryIterator and all base::StringPiece objects returned from GetKey
85// and GetValue methods.
[email protected]1a6436112013-10-09 02:49:5886class NET_EXPORT QueryIterator {
87 public:
88 explicit QueryIterator(const GURL& url);
David Bienvenua03ac8c2020-11-06 15:55:3989 QueryIterator(const QueryIterator&) = delete;
90 QueryIterator& operator=(const QueryIterator&) = delete;
[email protected]1a6436112013-10-09 02:49:5891 ~QueryIterator();
92
Alex Kalugin0ecaa652021-12-15 04:06:5393 base::StringPiece GetKey() const;
94 base::StringPiece GetValue() const;
[email protected]1a6436112013-10-09 02:49:5895 const std::string& GetUnescapedValue();
96
97 bool IsAtEnd() const;
98 void Advance();
99
100 private:
Ali Hijazi55179192022-11-09 16:28:51101 const raw_ref<const GURL> url_;
[email protected]ce97ca362014-04-30 11:35:46102 url::Component query_;
[email protected]1a6436112013-10-09 02:49:58103 bool at_end_;
[email protected]ce97ca362014-04-30 11:35:46104 url::Component key_;
105 url::Component value_;
[email protected]1a6436112013-10-09 02:49:58106 std::string unescaped_value_;
[email protected]1a6436112013-10-09 02:49:58107};
108
[email protected]ca93c2aa2013-01-31 17:41:01109// Looks for |search_key| in the query portion of |url|. Returns true if the
110// key is found and sets |out_value| to the unescaped value for the key.
111// Returns false if the key is not found.
112NET_EXPORT bool GetValueForKeyInQuery(const GURL& url,
David Benjamin044f8972022-10-24 18:50:08113 base::StringPiece search_key,
[email protected]ca93c2aa2013-01-31 17:41:01114 std::string* out_value);
115
tfarina7a4a7fd2016-01-20 14:23:44116// Splits an input of the form <host>[":"<port>] into its consitituent parts.
117// Saves the result into |*host| and |*port|. If the input did not have
118// the optional port, sets |*port| to -1.
119// Returns true if the parsing was successful, false otherwise.
120// The returned host is NOT canonicalized, and may be invalid.
121//
122// IPv6 literals must be specified in a bracketed form, for instance:
123// [::1]:90 and [::1]
124//
125// The resultant |*host| in both cases will be "::1" (not bracketed).
David Benjamin6e446042018-03-12 19:20:07126NET_EXPORT bool ParseHostAndPort(base::StringPiece input,
tfarina7a4a7fd2016-01-20 14:23:44127 std::string* host,
128 int* port);
129
130// Returns a host:port string for the given URL.
131NET_EXPORT std::string GetHostAndPort(const GURL& url);
132
133// Returns a host[:port] string for the given URL, where the port is omitted
134// if it is the default for the URL's scheme.
135NET_EXPORT std::string GetHostAndOptionalPort(const GURL& url);
tfarina77021d62015-10-11 20:19:03136
Matt Menke12b8a5b62021-12-16 15:15:13137// Just like above, but takes a SchemeHostPort.
138NET_EXPORT std::string GetHostAndOptionalPort(
139 const url::SchemeHostPort& scheme_host_port);
140
tfarina77021d62015-10-11 20:19:03141// Returns the hostname by trimming the ending dot, if one exists.
brettwb65cd5c2016-01-23 00:46:38142NET_EXPORT std::string TrimEndingDot(base::StringPiece host);
tfarina77021d62015-10-11 20:19:03143
tfarina7a4a7fd2016-01-20 14:23:44144// Returns either the host from |url|, or, if the host is empty, the full spec.
145NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
146
Lily Chenda524932020-02-11 20:19:55147// Returns the given domain minus its leftmost label, or the empty string if the
148// given domain is just a single label. For normal domain names (not IP
149// addresses), this represents the "superdomain" of the given domain.
150// Note that this does not take into account anything like the Public Suffix
151// List, so the superdomain may end up being a bare eTLD. The returned string is
152// not guaranteed to be a valid or canonical hostname, or to make any sense at
153// all.
154//
155// Examples:
156//
157// GetSuperdomain("assets.example.com") -> "example.com"
158// GetSuperdomain("example.net") -> "net"
159// GetSuperdomain("littlebox") -> ""
160// GetSuperdomain("127.0.0.1") -> "0.0.1"
161NET_EXPORT std::string GetSuperdomain(base::StringPiece domain);
162
Lily Chenf46d8ae82020-04-23 17:57:32163// Returns whether |subdomain| is a subdomain of (or identical to)
164// |superdomain|, if both are hostnames (not IP addresses -- for which this
165// function is nonsensical). Does not consider the Public Suffix List.
Lily Chen033d7022020-04-27 17:21:20166// Returns true if both input strings are empty.
Lily Chenf46d8ae82020-04-23 17:57:32167NET_EXPORT bool IsSubdomainOf(base::StringPiece subdomain,
168 base::StringPiece superdomain);
169
tfarina7a4a7fd2016-01-20 14:23:44170// Canonicalizes |host| and returns it. Also fills |host_info| with
171// IP address information. |host_info| must not be NULL.
brettwb65cd5c2016-01-23 00:46:38172NET_EXPORT std::string CanonicalizeHost(base::StringPiece host,
tfarina7a4a7fd2016-01-20 14:23:44173 url::CanonHostInfo* host_info);
174
175// Returns true if |host| is not an IP address and is compliant with a set of
176// rules based on RFC 1738 and tweaked to be compatible with the real world.
177// The rules are:
Eric Orth35126b62022-12-01 22:12:18178// * One or more non-empty labels separated by '.', each no more than 63
179// characters.
tfarina7a4a7fd2016-01-20 14:23:44180// * Each component contains only alphanumeric characters and '-' or '_'
181// * The last component begins with an alphanumeric character
182// * Optional trailing dot after last component (means "treat as FQDN")
Eric Orth35126b62022-12-01 22:12:18183// * Total size (including optional trailing dot, whether or not actually
184// present in `host`) no more than 254 characters.
tfarina7a4a7fd2016-01-20 14:23:44185//
186// NOTE: You should only pass in hosts that have been returned from
187// CanonicalizeHost(), or you may not get accurate results.
David Benjamin044f8972022-10-24 18:50:08188NET_EXPORT bool IsCanonicalizedHostCompliant(base::StringPiece host);
tfarina7a4a7fd2016-01-20 14:23:44189
tfarina3ad17452016-01-27 10:34:38190// Returns true if |hostname| contains a non-registerable or non-assignable
191// domain name (eg: a gTLD that has not been assigned by IANA) or an IP address
Nathan Parker4a78e3d2018-04-11 01:16:20192// that falls in an range reserved for non-publicly routable networks.
David Benjamin044f8972022-10-24 18:50:08193NET_EXPORT bool IsHostnameNonUnique(base::StringPiece hostname);
tfarina3ad17452016-01-27 10:34:38194
Rob Wuf79b3ba2018-01-14 01:54:31195// Returns true if the host part of |url| is a local host name according to
196// HostStringIsLocalhost.
197NET_EXPORT bool IsLocalhost(const GURL& url);
198
tfarina7ba5a622016-02-23 23:21:44199// Returns true if |host| is one of the local hostnames
200// (e.g. "localhost") or IP addresses (IPv4 127.0.0.0/8 or IPv6 ::1).
Rob Wuf79b3ba2018-01-14 01:54:31201// "[::1]" is not detected as a local hostname. Do not use this method to check
202// whether the host part of a URL is a local host name; use IsLocalhost instead.
tfarina7ba5a622016-02-23 23:21:44203//
204// Note that this function does not check for IP addresses other than
205// the above, although other IP addresses may point to the local
206// machine.
Rob Wuf79b3ba2018-01-14 01:54:31207NET_EXPORT bool HostStringIsLocalhost(base::StringPiece host);
tfarina7ba5a622016-02-23 23:21:44208
tfarina7a4a7fd2016-01-20 14:23:44209// Strip the portions of |url| that aren't core to the network request.
210// - user name / password
211// - reference section
212NET_EXPORT GURL SimplifyUrlForRequest(const GURL& url);
213
Adam Rice9bd428b0a2019-02-15 06:31:36214// Changes scheme "ws" to "http" and "wss" to "https". This is useful for origin
215// checks and authentication, where WebSocket URLs are treated as if they were
216// HTTP. It is an error to call this function with a url with a scheme other
217// than "ws" or "wss".
218NET_EXPORT GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url);
219
Lily Chene4070ef2020-12-22 16:14:38220// Returns whether the given url scheme is of a standard scheme type that can
221// have hostnames representing domains (i.e. network hosts).
222// See url::SchemeType.
223NET_EXPORT bool IsStandardSchemeWithNetworkHost(base::StringPiece scheme);
224
tfarinac38cb952016-01-14 12:45:01225// Extracts the unescaped username/password from |url|, saving the results
226// into |*username| and |*password|.
227NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url,
Jan Wilken Dörrie739ccc212021-03-11 18:13:05228 std::u16string* username,
229 std::u16string* password);
tfarinac38cb952016-01-14 12:45:01230
tfarina7a4a7fd2016-01-20 14:23:44231// Returns true if the url's host is a Google server. This should only be used
232// for histograms and shouldn't be used to affect behavior.
233NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url);
234
David Benjaminf89ca932019-04-24 23:55:04235// Returns true if |host| is the hostname of a Google server. This should only
236// be used for histograms and shouldn't be used to affect behavior.
237NET_EXPORT_PRIVATE bool IsGoogleHost(base::StringPiece host);
238
Tsuyoshi Horoe74601e42022-10-31 08:25:51239// Returns true if |host| is the hostname of a Google server and HTTPS DNS
240// record of |host| is expected to indicate H3 support. This should only be used
241// for histograms and shouldn't be used to affect behavior.
242NET_EXPORT_PRIVATE bool IsGoogleHostWithAlpnH3(base::StringPiece host);
243
tfarina9ed7f8c52016-02-19 17:50:18244// This function tests |host| to see if it is of any local hostname form.
Frédéric Wang71698e62020-12-10 06:13:52245// |host| is normalized before being tested.
246NET_EXPORT_PRIVATE bool IsLocalHostname(base::StringPiece host);
tfarina9ed7f8c52016-02-19 17:50:18247
Liviu Tinta8a22a8782023-01-13 18:19:04248// The notion of unescaping used in the application/x-www-form-urlencoded
249// parser. https://url.spec.whatwg.org/#concept-urlencoded-parser
250NET_EXPORT_PRIVATE std::string UnescapePercentEncodedUrl(
251 base::StringPiece input);
252
[email protected]ca93c2aa2013-01-31 17:41:01253} // namespace net
254
255#endif // NET_BASE_URL_UTIL_H_