blob: 44bda41e7a1c633eae97d7ce7a1a92b76b61100e [file] [log] [blame]
[email protected]9d797f32010-04-23 07:17:541// Copyright (c) 2010 The Chromium Authors. All rights reserved.
license.botbf09a502008-08-24 00:55:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit586acc5fe2008-07-26 22:42:524
[email protected]de2943352009-10-22 23:06:125#ifndef NET_BASE_NET_UTIL_H_
6#define NET_BASE_NET_UTIL_H_
[email protected]32b76ef2010-07-26 23:08:247#pragma once
initial.commit586acc5fe2008-07-26 22:42:528
[email protected]e537c352008-08-20 21:42:179#include "build/build_config.h"
10
[email protected]e537c352008-08-20 21:42:1711#ifdef OS_WIN
initial.commit586acc5fe2008-07-26 22:42:5212#include <windows.h>
[email protected]e537c352008-08-20 21:42:1713#endif
initial.commit586acc5fe2008-07-26 22:42:5214
[email protected]d16659f2009-05-18 16:47:3215#include <string>
[email protected]d95fa182009-09-09 17:01:1616#include <set>
[email protected]54392832010-06-08 23:25:0417#include <vector>
[email protected]d16659f2009-05-18 16:47:3218
initial.commit586acc5fe2008-07-26 22:42:5219#include "base/basictypes.h"
[email protected]5420bc1e2009-07-09 22:48:1620#include "base/string16.h"
[email protected]a23de8572009-06-03 02:16:3221#include "net/base/escape.h"
initial.commit586acc5fe2008-07-26 22:42:5222
[email protected]50d7d7282009-03-02 21:45:1823struct addrinfo;
[email protected]498c1a6b22008-11-24 23:37:0424class FilePath;
initial.commit586acc5fe2008-07-26 22:42:5225class GURL;
26
[email protected]13ef7c02008-11-20 22:30:1327namespace base {
28class Time;
29}
30
[email protected]01dbd932009-06-23 22:52:4231namespace url_canon {
32struct CanonHostInfo;
33}
34
[email protected]f9fe8632009-05-22 18:15:2435namespace url_parse {
36struct Parsed;
37}
38
[email protected]8ac1a752008-07-31 19:40:3739namespace net {
initial.commit586acc5fe2008-07-26 22:42:5240
[email protected]69c579e2010-04-23 20:01:0041// Used by FormatUrl to specify handling of certain parts of the url.
42typedef uint32 FormatUrlType;
43typedef uint32 FormatUrlTypes;
44
45// Nothing is ommitted.
46extern const FormatUrlType kFormatUrlOmitNothing;
47
48// If set, any username and password are removed.
49extern const FormatUrlType kFormatUrlOmitUsernamePassword;
50
51// If the scheme is 'http://', it's removed.
52extern const FormatUrlType kFormatUrlOmitHTTP;
53
[email protected]79845ef2010-06-02 02:37:4054// Omits the path if it is just a slash and there is no query or ref. This is
55// meaningful for non-file "standard" URLs.
56extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
57
[email protected]69c579e2010-04-23 20:01:0058// Convenience for omitting all unecessary types.
59extern const FormatUrlType kFormatUrlOmitAll;
60
[email protected]d95fa182009-09-09 17:01:1661// Holds a list of ports that should be accepted despite bans.
62extern std::set<int> explicitly_allowed_ports;
63
initial.commit586acc5fe2008-07-26 22:42:5264// Given the full path to a file name, creates a file: URL. The returned URL
65// may not be valid if the input is malformed.
[email protected]ceeb87e2008-12-04 20:46:0666GURL FilePathToFileURL(const FilePath& path);
initial.commit586acc5fe2008-07-26 22:42:5267
68// Converts a file: URL back to a filename that can be passed to the OS. The
69// file URL must be well-formed (GURL::is_valid() must return true); we don't
70// handle degenerate cases here. Returns true on success, false if it isn't a
71// valid file URL. On failure, *file_path will be empty.
[email protected]498c1a6b22008-11-24 23:37:0472bool FileURLToFilePath(const GURL& url, FilePath* file_path);
initial.commit586acc5fe2008-07-26 22:42:5273
[email protected]71e4573a2009-05-21 22:03:0074// Splits an input of the form <host>[":"<port>] into its consitituent parts.
[email protected]f6fb2de2009-02-19 08:11:4275// Saves the result into |*host| and |*port|. If the input did not have
76// the optional port, sets |*port| to -1.
77// Returns true if the parsing was successful, false otherwise.
[email protected]d16659f2009-05-18 16:47:3278// The returned host is NOT canonicalized, and may be invalid. If <host> is
79// an IPv6 literal address, the returned host includes the square brackets.
[email protected]71e4573a2009-05-21 22:03:0080bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
81 std::string::const_iterator host_and_port_end,
82 std::string* host,
83 int* port);
84bool ParseHostAndPort(const std::string& host_and_port,
85 std::string* host,
86 int* port);
87
88// Returns a host:port string for the given URL.
89std::string GetHostAndPort(const GURL& url);
90
91// Returns a host[:port] string for the given URL, where the port is omitted
92// if it is the default for the URL's scheme.
93std::string GetHostAndOptionalPort(const GURL& url);
[email protected]f6fb2de2009-02-19 08:11:4294
[email protected]50d7d7282009-03-02 21:45:1895// Returns the string representation of an address, like "192.168.0.1".
96// Returns empty string on failure.
97std::string NetAddressToString(const struct addrinfo* net_address);
98
[email protected]d1388f42010-06-16 03:14:4299// Same as NetAddressToString, but additionally includes the port number. For
100// example: "192.168.0.1:99" or "[::1]:80".
101std::string NetAddressToStringWithPort(const struct addrinfo* net_address);
102
[email protected]eba291342009-03-03 21:30:46103// Returns the hostname of the current system. Returns empty string on failure.
[email protected]43f0aae2009-03-25 18:00:00104std::string GetHostName();
[email protected]eba291342009-03-03 21:30:46105
[email protected]99d69352009-09-16 00:20:29106// Extracts the unescaped username/password from |url|, saving the results
107// into |*username| and |*password|.
108void GetIdentityFromURL(const GURL& url,
[email protected]13c8a092010-07-29 06:15:44109 string16* username,
110 string16* password);
[email protected]99d69352009-09-16 00:20:29111
[email protected]9d797f32010-04-23 07:17:54112// Returns either the host from |url|, or, if the host is empty, the full spec.
113std::string GetHostOrSpecFromURL(const GURL& url);
114
initial.commit586acc5fe2008-07-26 22:42:52115// Return the value of the HTTP response header with name 'name'. 'headers'
116// should be in the format that URLRequest::GetResponseHeaders() returns.
117// Returns the empty string if the header is not found.
118std::wstring GetSpecificHeader(const std::wstring& headers,
119 const std::wstring& name);
120std::string GetSpecificHeader(const std::string& headers,
121 const std::string& name);
122
123// Return the value of the HTTP response header field's parameter named
124// 'param_name'. Returns the empty string if the parameter is not found or is
125// improperly formatted.
126std::wstring GetHeaderParamValue(const std::wstring& field,
127 const std::wstring& param_name);
128std::string GetHeaderParamValue(const std::string& field,
129 const std::string& param_name);
130
[email protected]c9825a42009-05-01 22:51:50131// Return the filename extracted from Content-Disposition header. The following
132// formats are tried in order listed below:
initial.commit586acc5fe2008-07-26 22:42:52133//
[email protected]c9825a42009-05-01 22:51:50134// 1. RFC 2047
135// 2. Raw-8bit-characters :
136// a. UTF-8, b. referrer_charset, c. default os codepage.
137// 3. %-escaped UTF-8.
138//
139// In step 2, if referrer_charset is empty(i.e. unknown), 2b is skipped.
140// In step 3, the fallback charsets tried in step 2 are not tried. We
141// can consider doing that later.
142//
143// When a param value is ASCII, but is not in format #1 or format #3 above,
144// it is returned as it is unless it's pretty close to two supported
initial.commit586acc5fe2008-07-26 22:42:52145// formats but not well-formed. In that case, an empty string is returned.
146//
147// In any case, a caller must check for the empty return value and resort to
148// another means to get a filename (e.g. url).
149//
150// This function does not do any escaping and callers are responsible for
151// escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit.
152//
153// TODO(jungshik): revisit this issue. At the moment, the only caller
154// net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters. The
155// other caller is a unit test. Need to figure out expose this function only to
156// net_util_unittest.
157//
[email protected]de2943352009-10-22 23:06:12158std::string GetFileNameFromCD(const std::string& header,
159 const std::string& referrer_charset);
initial.commit586acc5fe2008-07-26 22:42:52160
[email protected]ce85f602009-11-07 01:34:53161// Converts the given host name to unicode characters. This can be called for
162// any host name, if the input is not IDN or is invalid in some way, we'll just
163// return the ASCII source so it is still usable.
initial.commit586acc5fe2008-07-26 22:42:52164//
165// The input should be the canonicalized ASCII host name from GURL. This
166// function does NOT accept UTF-8! Its length must also be given (this is
167// designed to work on the substring of the host out of a URL spec).
168//
169// |languages| is a comma separated list of ISO 639 language codes. It
170// is used to determine whether a hostname is 'comprehensible' to a user
171// who understands languages listed. |host| will be converted to a
172// human-readable form (Unicode) ONLY when each component of |host| is
173// regarded as 'comprehensible'. Scipt-mixing is not allowed except that
174// Latin letters in the ASCII range can be mixed with a limited set of
175// script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
176// When |languages| is empty, even that mixing is not allowed.
[email protected]ce85f602009-11-07 01:34:53177//
178// |offset_for_adjustment| is an offset into |host|, which will be adjusted to
179// point at the same logical place in the output string. If this isn't possible
180// because it points past the end of |host| or into the middle of a punycode
181// sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may
182// be NULL.
183std::wstring IDNToUnicode(const char* host,
184 size_t host_len,
185 const std::wstring& languages,
186 size_t* offset_for_adjustment);
initial.commit586acc5fe2008-07-26 22:42:52187
[email protected]01dbd932009-06-23 22:52:42188// Canonicalizes |host| and returns it. Also fills |host_info| with
189// IP address information. |host_info| must not be NULL.
190std::string CanonicalizeHost(const std::string& host,
191 url_canon::CanonHostInfo* host_info);
192std::string CanonicalizeHost(const std::wstring& host,
193 url_canon::CanonHostInfo* host_info);
initial.commit586acc5fe2008-07-26 22:42:52194
[email protected]833fa262009-10-27 21:06:11195// Returns true if |host| is not an IP address and is compliant with a set of
196// rules based on RFC 1738 and tweaked to be compatible with the real world.
197// The rules are:
[email protected]96be43e92009-10-16 19:49:22198// * One or more components separated by '.'
199// * Each component begins and ends with an alphanumeric character
[email protected]833fa262009-10-27 21:06:11200// * Each component contains only alphanumeric characters and '-' or '_'
[email protected]96be43e92009-10-16 19:49:22201// * The last component does not begin with a digit
[email protected]d120c3722009-11-03 18:17:26202// * Optional trailing dot after last component (means "treat as FQDN")
[email protected]7e563812010-03-22 20:05:59203// If |desired_tld| is non-NULL, the host will only be considered invalid if
204// appending it as a trailing component still results in an invalid host. This
205// helps us avoid marking as "invalid" user attempts to open "www.401k.com" by
206// typing 4-0-1-k-<ctrl>+<enter>.
[email protected]96be43e92009-10-16 19:49:22207//
208// NOTE: You should only pass in hosts that have been returned from
209// CanonicalizeHost(), or you may not get accurate results.
[email protected]7e563812010-03-22 20:05:59210bool IsCanonicalizedHostCompliant(const std::string& host,
211 const std::string& desired_tld);
[email protected]96be43e92009-10-16 19:49:22212
[email protected]5420bc1e2009-07-09 22:48:16213// Call these functions to get the html snippet for a directory listing.
214// The return values of both functions are in UTF-8.
215std::string GetDirectoryListingHeader(const string16& title);
216
217// Given the name of a file in a directory (ftp or local) and
218// other information (is_dir, size, modification time), it returns
219// the html snippet to add the entry for the file to the directory listing.
220// Currently, it's a script tag containing a call to a Javascript function
221// |addRow|.
222//
[email protected]193c3512010-05-11 09:19:30223// |name| is the file name to be displayed. |raw_bytes| will be used
224// as the actual target of the link (so for example, ftp links should use
225// server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name|
226// will be used.
227//
228// Both |name| and |raw_bytes| are escaped internally.
[email protected]5420bc1e2009-07-09 22:48:16229std::string GetDirectoryListingEntry(const string16& name,
230 const std::string& raw_bytes,
231 bool is_dir, int64 size,
232 base::Time modified);
initial.commit586acc5fe2008-07-26 22:42:52233
234// If text starts with "www." it is removed, otherwise text is returned
235// unmodified.
236std::wstring StripWWW(const std::wstring& text);
237
238// Gets the filename from the raw Content-Disposition header (as read from the
239// network). Otherwise uses the last path component name or hostname from
[email protected]630947c2009-11-04 18:37:31240// |url|. If there is no filename or it can't be used, the given |default_name|,
241// will be used unless it is empty.
[email protected]de2943352009-10-22 23:06:12242
243// Note: it's possible for the suggested filename to be empty (e.g.,
[email protected]13c34d12009-09-14 20:51:04244// file:///). referrer_charset is used as one of charsets
[email protected]c9825a42009-05-01 22:51:50245// to interpret a raw 8bit string in C-D header (after interpreting
246// as UTF-8 fails). See the comment for GetFilenameFromCD for more details.
[email protected]de2943352009-10-22 23:06:12247FilePath GetSuggestedFilename(const GURL& url,
248 const std::string& content_disposition,
249 const std::string& referrer_charset,
[email protected]630947c2009-11-04 18:37:31250 const FilePath& default_name);
initial.commit586acc5fe2008-07-26 22:42:52251
252// Checks the given port against a list of ports which are restricted by
253// default. Returns true if the port is allowed, false if it is restricted.
254bool IsPortAllowedByDefault(int port);
255
256// Checks the given port against a list of ports which are restricted by the
257// FTP protocol. Returns true if the port is allowed, false if it is
258// restricted.
259bool IsPortAllowedByFtp(int port);
260
[email protected]d95fa182009-09-09 17:01:16261// Check if banned |port| has been overriden by an entry in
262// |explicitly_allowed_ports_|.
263bool IsPortAllowedByOverride(int port);
264
[email protected]f6f1ba3cf2008-11-11 01:06:15265// Set socket to non-blocking mode
266int SetNonBlocking(int fd);
267
[email protected]f9fe8632009-05-22 18:15:24268// Appends the given part of the original URL to the output string formatted for
269// the user. The given parsed structure will be updated. The host name formatter
270// also takes the same accept languages component as ElideURL. |new_parsed| may
271// be null.
[email protected]ce85f602009-11-07 01:34:53272void AppendFormattedHost(const GURL& url,
273 const std::wstring& languages,
274 std::wstring* output,
275 url_parse::Parsed* new_parsed,
276 size_t* offset_for_adjustment);
[email protected]f9fe8632009-05-22 18:15:24277
[email protected]ce85f602009-11-07 01:34:53278// Creates a string representation of |url|. The IDN host name may be in Unicode
[email protected]69c579e2010-04-23 20:01:00279// if |languages| accepts the Unicode representation. |format_type| is a bitmask
280// of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
281// the URL for human readability. You will generally want |UnescapeRule::SPACES|
282// for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
283// if not. If the path part and the query part seem to be encoded in %-encoded
284// UTF-8, decodes %-encoding and UTF-8.
[email protected]ce85f602009-11-07 01:34:53285//
286// The last three parameters may be NULL.
287// |new_parsed| will be set to the parsing parameters of the resultant URL.
[email protected]a23de8572009-06-03 02:16:32288// |prefix_end| will be the length before the hostname of the resultant URL.
[email protected]ce85f602009-11-07 01:34:53289// |offset_for_adjustment| is an offset into the original |url|'s spec(), which
290// will be modified to reflect changes this function makes to the output string;
291// for example, if |url| is "http://a:[email protected]/", |omit_username_password| is
292// true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return
293// the output string will be "http://c.com/" and |offset_for_adjustment| will be
294// 8. If the offset cannot be successfully adjusted (e.g. because it points
295// into the middle of a component that was entirely removed, past the end of the
296// string, or into the middle of an encoding sequence), it will be set to
297// std::wstring::npos.
[email protected]f9fe8632009-05-22 18:15:24298std::wstring FormatUrl(const GURL& url,
299 const std::wstring& languages,
[email protected]69c579e2010-04-23 20:01:00300 FormatUrlTypes format_types,
[email protected]a23de8572009-06-03 02:16:32301 UnescapeRule::Type unescape_rules,
[email protected]f9fe8632009-05-22 18:15:24302 url_parse::Parsed* new_parsed,
[email protected]ce85f602009-11-07 01:34:53303 size_t* prefix_end,
304 size_t* offset_for_adjustment);
[email protected]f9fe8632009-05-22 18:15:24305
[email protected]79845ef2010-06-02 02:37:40306// This is a convenience function for FormatUrl() with
307// format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
308// set of flags for "URLs to display to the user". You should be cautious about
309// using this for URLs which will be parsed or sent to other applications.
[email protected]f9fe8632009-05-22 18:15:24310inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) {
[email protected]79845ef2010-06-02 02:37:40311 return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES,
312 NULL, NULL, NULL);
[email protected]f9fe8632009-05-22 18:15:24313}
314
[email protected]79845ef2010-06-02 02:37:40315// Returns whether FormatUrl() would strip a trailing slash from |url|, given a
316// format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
317bool CanStripTrailingSlash(const GURL& url);
318
[email protected]7ec7c182009-08-04 02:33:58319// Strip the portions of |url| that aren't core to the network request.
320// - user name / password
321// - reference section
322GURL SimplifyUrlForRequest(const GURL& url);
323
[email protected]d95fa182009-09-09 17:01:16324void SetExplicitlyAllowedPorts(const std::wstring& allowed_ports);
325
[email protected]32eaa332010-02-08 22:15:54326// Perform a simplistic test to see if IPv6 is supported by trying to create an
327// IPv6 socket.
328// TODO(jar): Make test more in-depth as needed.
329bool IPv6Supported();
330
[email protected]2f3bc65c2010-07-23 17:47:10331// Returns true if it can determine that only loopback addresses are configured.
332// i.e. if only 127.0.0.1 and ::1 are routable.
333bool HaveOnlyLoopbackAddresses();
334
[email protected]54392832010-06-08 23:25:04335// IPAddressNumber is used to represent an IP address's numeric value as an
336// array of bytes, from most significant to least significant. This is the
337// network byte ordering.
338//
339// IPv4 addresses will have length 4, whereas IPv6 address will have length 16.
340typedef std::vector<unsigned char> IPAddressNumber;
341
342// Parses an IP address literal (either IPv4 or IPv6) to its numeric value.
343// Returns true on success and fills |ip_number| with the numeric value.
344bool ParseIPLiteralToNumber(const std::string& ip_literal,
345 IPAddressNumber* ip_number);
346
347// Converts an IPv4 address to an IPv4-mapped IPv6 address.
348// For example 192.168.0.1 would be converted to ::ffff:192.168.0.1.
349IPAddressNumber ConvertIPv4NumberToIPv6Number(
350 const IPAddressNumber& ipv4_number);
351
352// Parses an IP block specifier from CIDR notation to an
353// (IP address, prefix length) pair. Returns true on success and fills
354// |*ip_number| with the numeric value of the IP address and sets
355// |*prefix_length_in_bits| with the length of the prefix.
356//
357// CIDR notation literals can use either IPv4 or IPv6 literals. Some examples:
358//
359// 10.10.3.1/20
360// a:b:c::/46
361// ::1/128
362bool ParseCIDRBlock(const std::string& cidr_literal,
363 IPAddressNumber* ip_number,
364 size_t* prefix_length_in_bits);
365
366// Compares an IP address to see if it falls within the specified IP block.
367// Returns true if it does, false otherwise.
368//
369// The IP block is given by (|ip_prefix|, |prefix_length_in_bits|) -- any
370// IP address whose |prefix_length_in_bits| most significant bits match
371// |ip_prefix| will be matched.
372//
373// In cases when an IPv4 address is being compared to an IPv6 address prefix
374// and vice versa, the IPv4 addresses will be converted to IPv4-mapped
375// (IPv6) addresses.
376bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number,
377 const IPAddressNumber& ip_prefix,
378 size_t prefix_length_in_bits);
379
[email protected]d1388f42010-06-16 03:14:42380// Returns the port field of the sockaddr in |info|.
381uint16* GetPortFieldFromAddrinfo(const struct addrinfo* info);
382
383// Returns the value of |info's| port (in host byte ordering).
384int GetPortFromAddrinfo(const struct addrinfo* info);
385
[email protected]8ac1a752008-07-31 19:40:37386} // namespace net
initial.commit586acc5fe2008-07-26 22:42:52387
[email protected]de2943352009-10-22 23:06:12388#endif // NET_BASE_NET_UTIL_H_