blob: bc1f761a5434d5b956efe020476ecf21b30aa377 [file] [log] [blame]
Avi Drissman64595482022-09-14 20:52:291// Copyright 2013 The Chromium Authors
[email protected]ca93c2aa2013-01-31 17:41:012// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
eromanf6c3a1e62016-02-09 23:13:455// This file contains a set of utility functions related to parsing,
6// manipulating, and interacting with URLs and hostnames. These functions are
7// intended to be of a text-processing nature, and should not attempt to use any
8// networking or blocking services.
9
[email protected]ca93c2aa2013-01-31 17:41:0110#ifndef NET_BASE_URL_UTIL_H_
11#define NET_BASE_URL_UTIL_H_
12
13#include <string>
14
tfarina77021d62015-10-11 20:19:0315#include "base/strings/string_piece.h"
[email protected]ca93c2aa2013-01-31 17:41:0116#include "net/base/net_export.h"
Anudeep Palankic22db422022-10-07 19:36:2617#include "third_party/abseil-cpp/absl/types/optional.h"
tfarina018de6e2015-05-26 17:41:2018#include "url/third_party/mozilla/url_parse.h"
[email protected]ca93c2aa2013-01-31 17:41:0119
20class GURL;
21
tfarina7a4a7fd2016-01-20 14:23:4422namespace url {
23struct CanonHostInfo;
Matt Menke12b8a5b62021-12-16 15:15:1324class SchemeHostPort;
tfarina7a4a7fd2016-01-20 14:23:4425}
26
[email protected]ca93c2aa2013-01-31 17:41:0127namespace net {
28
29// Returns a new GURL by appending the given query parameter name and the
30// value. Unsafe characters in the name and the value are escaped like
31// %XX%XX. The original query component is preserved if it's present.
32//
33// Examples:
34//
35// AppendQueryParameter(GURL("http://example.com"), "name", "value").spec()
36// => "http://example.com?name=value"
37// AppendQueryParameter(GURL("http://example.com?x=y"), "name", "value").spec()
38// => "http://example.com?x=y&name=value"
39NET_EXPORT GURL AppendQueryParameter(const GURL& url,
40 const std::string& name,
41 const std::string& value);
42
43// Returns a new GURL by appending or replacing the given query parameter name
Anudeep Palankic22db422022-10-07 19:36:2644// and the value. If `name` appears more than once, only the first name-value
[email protected]ca93c2aa2013-01-31 17:41:0145// pair is replaced. Unsafe characters in the name and the value are escaped
46// like %XX%XX. The original query component is preserved if it's present.
Anudeep Palankic22db422022-10-07 19:36:2647// Using `absl::nullopt` for `value` will remove the `name` parameter.
[email protected]ca93c2aa2013-01-31 17:41:0148//
49// Examples:
50//
51// AppendOrReplaceQueryParameter(
52// GURL("http://example.com"), "name", "new").spec()
53// => "http://example.com?name=value"
54// AppendOrReplaceQueryParameter(
55// GURL("http://example.com?x=y&name=old"), "name", "new").spec()
56// => "http://example.com?x=y&name=new"
Anudeep Palankic22db422022-10-07 19:36:2657// AppendOrReplaceQueryParameter(
58// GURL("http://example.com?x=y&name=old"), "name", absl::nullopt).spec()
59// => "http://example.com?x=y&"
60NET_EXPORT GURL
61AppendOrReplaceQueryParameter(const GURL& url,
62 const std::string& name,
63 absl::optional<base::StringPiece> value);
[email protected]ca93c2aa2013-01-31 17:41:0164
[email protected]1a6436112013-10-09 02:49:5865// Iterates over the key-value pairs in the query portion of |url|.
Alex Kalugin0ecaa652021-12-15 04:06:5366// NOTE: QueryIterator stores reference to |url| and creates base::StringPiece
67// instances which refer to the data inside |url| query. Therefore |url| must
68// outlive QueryIterator and all base::StringPiece objects returned from GetKey
69// and GetValue methods.
[email protected]1a6436112013-10-09 02:49:5870class NET_EXPORT QueryIterator {
71 public:
72 explicit QueryIterator(const GURL& url);
David Bienvenua03ac8c2020-11-06 15:55:3973 QueryIterator(const QueryIterator&) = delete;
74 QueryIterator& operator=(const QueryIterator&) = delete;
[email protected]1a6436112013-10-09 02:49:5875 ~QueryIterator();
76
Alex Kalugin0ecaa652021-12-15 04:06:5377 base::StringPiece GetKey() const;
78 base::StringPiece GetValue() const;
[email protected]1a6436112013-10-09 02:49:5879 const std::string& GetUnescapedValue();
80
81 bool IsAtEnd() const;
82 void Advance();
83
84 private:
85 const GURL& url_;
[email protected]ce97ca362014-04-30 11:35:4686 url::Component query_;
[email protected]1a6436112013-10-09 02:49:5887 bool at_end_;
[email protected]ce97ca362014-04-30 11:35:4688 url::Component key_;
89 url::Component value_;
[email protected]1a6436112013-10-09 02:49:5890 std::string unescaped_value_;
[email protected]1a6436112013-10-09 02:49:5891};
92
[email protected]ca93c2aa2013-01-31 17:41:0193// Looks for |search_key| in the query portion of |url|. Returns true if the
94// key is found and sets |out_value| to the unescaped value for the key.
95// Returns false if the key is not found.
96NET_EXPORT bool GetValueForKeyInQuery(const GURL& url,
97 const std::string& search_key,
98 std::string* out_value);
99
tfarina7a4a7fd2016-01-20 14:23:44100// Splits an input of the form <host>[":"<port>] into its consitituent parts.
101// Saves the result into |*host| and |*port|. If the input did not have
102// the optional port, sets |*port| to -1.
103// Returns true if the parsing was successful, false otherwise.
104// The returned host is NOT canonicalized, and may be invalid.
105//
106// IPv6 literals must be specified in a bracketed form, for instance:
107// [::1]:90 and [::1]
108//
109// The resultant |*host| in both cases will be "::1" (not bracketed).
David Benjamin6e446042018-03-12 19:20:07110NET_EXPORT bool ParseHostAndPort(base::StringPiece input,
tfarina7a4a7fd2016-01-20 14:23:44111 std::string* host,
112 int* port);
113
114// Returns a host:port string for the given URL.
115NET_EXPORT std::string GetHostAndPort(const GURL& url);
116
117// Returns a host[:port] string for the given URL, where the port is omitted
118// if it is the default for the URL's scheme.
119NET_EXPORT std::string GetHostAndOptionalPort(const GURL& url);
tfarina77021d62015-10-11 20:19:03120
Matt Menke12b8a5b62021-12-16 15:15:13121// Just like above, but takes a SchemeHostPort.
122NET_EXPORT std::string GetHostAndOptionalPort(
123 const url::SchemeHostPort& scheme_host_port);
124
tfarina77021d62015-10-11 20:19:03125// Returns the hostname by trimming the ending dot, if one exists.
brettwb65cd5c2016-01-23 00:46:38126NET_EXPORT std::string TrimEndingDot(base::StringPiece host);
tfarina77021d62015-10-11 20:19:03127
tfarina7a4a7fd2016-01-20 14:23:44128// Returns either the host from |url|, or, if the host is empty, the full spec.
129NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
130
Lily Chenda524932020-02-11 20:19:55131// Returns the given domain minus its leftmost label, or the empty string if the
132// given domain is just a single label. For normal domain names (not IP
133// addresses), this represents the "superdomain" of the given domain.
134// Note that this does not take into account anything like the Public Suffix
135// List, so the superdomain may end up being a bare eTLD. The returned string is
136// not guaranteed to be a valid or canonical hostname, or to make any sense at
137// all.
138//
139// Examples:
140//
141// GetSuperdomain("assets.example.com") -> "example.com"
142// GetSuperdomain("example.net") -> "net"
143// GetSuperdomain("littlebox") -> ""
144// GetSuperdomain("127.0.0.1") -> "0.0.1"
145NET_EXPORT std::string GetSuperdomain(base::StringPiece domain);
146
Lily Chenf46d8ae82020-04-23 17:57:32147// Returns whether |subdomain| is a subdomain of (or identical to)
148// |superdomain|, if both are hostnames (not IP addresses -- for which this
149// function is nonsensical). Does not consider the Public Suffix List.
Lily Chen033d7022020-04-27 17:21:20150// Returns true if both input strings are empty.
Lily Chenf46d8ae82020-04-23 17:57:32151NET_EXPORT bool IsSubdomainOf(base::StringPiece subdomain,
152 base::StringPiece superdomain);
153
tfarina7a4a7fd2016-01-20 14:23:44154// Canonicalizes |host| and returns it. Also fills |host_info| with
155// IP address information. |host_info| must not be NULL.
brettwb65cd5c2016-01-23 00:46:38156NET_EXPORT std::string CanonicalizeHost(base::StringPiece host,
tfarina7a4a7fd2016-01-20 14:23:44157 url::CanonHostInfo* host_info);
158
159// Returns true if |host| is not an IP address and is compliant with a set of
160// rules based on RFC 1738 and tweaked to be compatible with the real world.
161// The rules are:
162// * One or more components separated by '.'
163// * Each component contains only alphanumeric characters and '-' or '_'
164// * The last component begins with an alphanumeric character
165// * Optional trailing dot after last component (means "treat as FQDN")
166//
167// NOTE: You should only pass in hosts that have been returned from
168// CanonicalizeHost(), or you may not get accurate results.
169NET_EXPORT bool IsCanonicalizedHostCompliant(const std::string& host);
170
tfarina3ad17452016-01-27 10:34:38171// Returns true if |hostname| contains a non-registerable or non-assignable
172// domain name (eg: a gTLD that has not been assigned by IANA) or an IP address
Nathan Parker4a78e3d2018-04-11 01:16:20173// that falls in an range reserved for non-publicly routable networks.
tfarina3ad17452016-01-27 10:34:38174NET_EXPORT bool IsHostnameNonUnique(const std::string& hostname);
175
Rob Wuf79b3ba2018-01-14 01:54:31176// Returns true if the host part of |url| is a local host name according to
177// HostStringIsLocalhost.
178NET_EXPORT bool IsLocalhost(const GURL& url);
179
tfarina7ba5a622016-02-23 23:21:44180// Returns true if |host| is one of the local hostnames
181// (e.g. "localhost") or IP addresses (IPv4 127.0.0.0/8 or IPv6 ::1).
Rob Wuf79b3ba2018-01-14 01:54:31182// "[::1]" is not detected as a local hostname. Do not use this method to check
183// whether the host part of a URL is a local host name; use IsLocalhost instead.
tfarina7ba5a622016-02-23 23:21:44184//
185// Note that this function does not check for IP addresses other than
186// the above, although other IP addresses may point to the local
187// machine.
Rob Wuf79b3ba2018-01-14 01:54:31188NET_EXPORT bool HostStringIsLocalhost(base::StringPiece host);
tfarina7ba5a622016-02-23 23:21:44189
tfarina7a4a7fd2016-01-20 14:23:44190// Strip the portions of |url| that aren't core to the network request.
191// - user name / password
192// - reference section
193NET_EXPORT GURL SimplifyUrlForRequest(const GURL& url);
194
Adam Rice9bd428b0a2019-02-15 06:31:36195// Changes scheme "ws" to "http" and "wss" to "https". This is useful for origin
196// checks and authentication, where WebSocket URLs are treated as if they were
197// HTTP. It is an error to call this function with a url with a scheme other
198// than "ws" or "wss".
199NET_EXPORT GURL ChangeWebSocketSchemeToHttpScheme(const GURL& url);
200
Lily Chene4070ef2020-12-22 16:14:38201// Returns whether the given url scheme is of a standard scheme type that can
202// have hostnames representing domains (i.e. network hosts).
203// See url::SchemeType.
204NET_EXPORT bool IsStandardSchemeWithNetworkHost(base::StringPiece scheme);
205
tfarinac38cb952016-01-14 12:45:01206// Extracts the unescaped username/password from |url|, saving the results
207// into |*username| and |*password|.
208NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url,
Jan Wilken Dörrie739ccc212021-03-11 18:13:05209 std::u16string* username,
210 std::u16string* password);
tfarinac38cb952016-01-14 12:45:01211
tfarina7a4a7fd2016-01-20 14:23:44212// Returns true if the url's host is a Google server. This should only be used
213// for histograms and shouldn't be used to affect behavior.
214NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url);
215
David Benjaminf89ca932019-04-24 23:55:04216// Returns true if |host| is the hostname of a Google server. This should only
217// be used for histograms and shouldn't be used to affect behavior.
218NET_EXPORT_PRIVATE bool IsGoogleHost(base::StringPiece host);
219
tfarina9ed7f8c52016-02-19 17:50:18220// This function tests |host| to see if it is of any local hostname form.
Frédéric Wang71698e62020-12-10 06:13:52221// |host| is normalized before being tested.
222NET_EXPORT_PRIVATE bool IsLocalHostname(base::StringPiece host);
tfarina9ed7f8c52016-02-19 17:50:18223
[email protected]ca93c2aa2013-01-31 17:41:01224} // namespace net
225
226#endif // NET_BASE_URL_UTIL_H_