[email protected] | de294335 | 2009-10-22 23:06:12 | [diff] [blame] | 1 | // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 4 | |
[email protected] | de294335 | 2009-10-22 23:06:12 | [diff] [blame] | 5 | #ifndef NET_BASE_NET_UTIL_H_ |
| 6 | #define NET_BASE_NET_UTIL_H_ |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 7 | |
[email protected] | e537c35 | 2008-08-20 21:42:17 | [diff] [blame] | 8 | #include "build/build_config.h" |
| 9 | |
[email protected] | e537c35 | 2008-08-20 21:42:17 | [diff] [blame] | 10 | #ifdef OS_WIN |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 11 | #include <windows.h> |
[email protected] | e537c35 | 2008-08-20 21:42:17 | [diff] [blame] | 12 | #endif |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 13 | |
[email protected] | d16659f | 2009-05-18 16:47:32 | [diff] [blame] | 14 | #include <string> |
[email protected] | d95fa18 | 2009-09-09 17:01:16 | [diff] [blame] | 15 | #include <set> |
[email protected] | d16659f | 2009-05-18 16:47:32 | [diff] [blame] | 16 | |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 17 | #include "base/basictypes.h" |
[email protected] | 5420bc1e | 2009-07-09 22:48:16 | [diff] [blame] | 18 | #include "base/string16.h" |
[email protected] | a23de857 | 2009-06-03 02:16:32 | [diff] [blame] | 19 | #include "net/base/escape.h" |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 20 | |
[email protected] | 50d7d728 | 2009-03-02 21:45:18 | [diff] [blame] | 21 | struct addrinfo; |
[email protected] | 498c1a6b2 | 2008-11-24 23:37:04 | [diff] [blame] | 22 | class FilePath; |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 23 | class GURL; |
| 24 | |
[email protected] | 13ef7c0 | 2008-11-20 22:30:13 | [diff] [blame] | 25 | namespace base { |
| 26 | class Time; |
| 27 | } |
| 28 | |
[email protected] | 01dbd93 | 2009-06-23 22:52:42 | [diff] [blame] | 29 | namespace url_canon { |
| 30 | struct CanonHostInfo; |
| 31 | } |
| 32 | |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 33 | namespace url_parse { |
| 34 | struct Parsed; |
| 35 | } |
| 36 | |
[email protected] | 8ac1a75 | 2008-07-31 19:40:37 | [diff] [blame] | 37 | namespace net { |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 38 | |
[email protected] | d95fa18 | 2009-09-09 17:01:16 | [diff] [blame] | 39 | // Holds a list of ports that should be accepted despite bans. |
| 40 | extern std::set<int> explicitly_allowed_ports; |
| 41 | |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 42 | // Given the full path to a file name, creates a file: URL. The returned URL |
| 43 | // may not be valid if the input is malformed. |
[email protected] | ceeb87e | 2008-12-04 20:46:06 | [diff] [blame] | 44 | GURL FilePathToFileURL(const FilePath& path); |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 45 | |
| 46 | // Converts a file: URL back to a filename that can be passed to the OS. The |
| 47 | // file URL must be well-formed (GURL::is_valid() must return true); we don't |
| 48 | // handle degenerate cases here. Returns true on success, false if it isn't a |
| 49 | // valid file URL. On failure, *file_path will be empty. |
[email protected] | 498c1a6b2 | 2008-11-24 23:37:04 | [diff] [blame] | 50 | bool FileURLToFilePath(const GURL& url, FilePath* file_path); |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 51 | |
[email protected] | 71e4573a | 2009-05-21 22:03:00 | [diff] [blame] | 52 | // Splits an input of the form <host>[":"<port>] into its consitituent parts. |
[email protected] | f6fb2de | 2009-02-19 08:11:42 | [diff] [blame] | 53 | // Saves the result into |*host| and |*port|. If the input did not have |
| 54 | // the optional port, sets |*port| to -1. |
| 55 | // Returns true if the parsing was successful, false otherwise. |
[email protected] | d16659f | 2009-05-18 16:47:32 | [diff] [blame] | 56 | // The returned host is NOT canonicalized, and may be invalid. If <host> is |
| 57 | // an IPv6 literal address, the returned host includes the square brackets. |
[email protected] | 71e4573a | 2009-05-21 22:03:00 | [diff] [blame] | 58 | bool ParseHostAndPort(std::string::const_iterator host_and_port_begin, |
| 59 | std::string::const_iterator host_and_port_end, |
| 60 | std::string* host, |
| 61 | int* port); |
| 62 | bool ParseHostAndPort(const std::string& host_and_port, |
| 63 | std::string* host, |
| 64 | int* port); |
| 65 | |
| 66 | // Returns a host:port string for the given URL. |
| 67 | std::string GetHostAndPort(const GURL& url); |
| 68 | |
| 69 | // Returns a host[:port] string for the given URL, where the port is omitted |
| 70 | // if it is the default for the URL's scheme. |
| 71 | std::string GetHostAndOptionalPort(const GURL& url); |
[email protected] | f6fb2de | 2009-02-19 08:11:42 | [diff] [blame] | 72 | |
[email protected] | 50d7d728 | 2009-03-02 21:45:18 | [diff] [blame] | 73 | // Returns the string representation of an address, like "192.168.0.1". |
| 74 | // Returns empty string on failure. |
| 75 | std::string NetAddressToString(const struct addrinfo* net_address); |
| 76 | |
[email protected] | eba29134 | 2009-03-03 21:30:46 | [diff] [blame] | 77 | // Returns the hostname of the current system. Returns empty string on failure. |
[email protected] | 43f0aae | 2009-03-25 18:00:00 | [diff] [blame] | 78 | std::string GetHostName(); |
[email protected] | eba29134 | 2009-03-03 21:30:46 | [diff] [blame] | 79 | |
[email protected] | 99d6935 | 2009-09-16 00:20:29 | [diff] [blame] | 80 | // Extracts the unescaped username/password from |url|, saving the results |
| 81 | // into |*username| and |*password|. |
| 82 | void GetIdentityFromURL(const GURL& url, |
| 83 | std::wstring* username, |
| 84 | std::wstring* password); |
| 85 | |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 86 | // Return the value of the HTTP response header with name 'name'. 'headers' |
| 87 | // should be in the format that URLRequest::GetResponseHeaders() returns. |
| 88 | // Returns the empty string if the header is not found. |
| 89 | std::wstring GetSpecificHeader(const std::wstring& headers, |
| 90 | const std::wstring& name); |
| 91 | std::string GetSpecificHeader(const std::string& headers, |
| 92 | const std::string& name); |
| 93 | |
| 94 | // Return the value of the HTTP response header field's parameter named |
| 95 | // 'param_name'. Returns the empty string if the parameter is not found or is |
| 96 | // improperly formatted. |
| 97 | std::wstring GetHeaderParamValue(const std::wstring& field, |
| 98 | const std::wstring& param_name); |
| 99 | std::string GetHeaderParamValue(const std::string& field, |
| 100 | const std::string& param_name); |
| 101 | |
[email protected] | c9825a4 | 2009-05-01 22:51:50 | [diff] [blame] | 102 | // Return the filename extracted from Content-Disposition header. The following |
| 103 | // formats are tried in order listed below: |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 104 | // |
[email protected] | c9825a4 | 2009-05-01 22:51:50 | [diff] [blame] | 105 | // 1. RFC 2047 |
| 106 | // 2. Raw-8bit-characters : |
| 107 | // a. UTF-8, b. referrer_charset, c. default os codepage. |
| 108 | // 3. %-escaped UTF-8. |
| 109 | // |
| 110 | // In step 2, if referrer_charset is empty(i.e. unknown), 2b is skipped. |
| 111 | // In step 3, the fallback charsets tried in step 2 are not tried. We |
| 112 | // can consider doing that later. |
| 113 | // |
| 114 | // When a param value is ASCII, but is not in format #1 or format #3 above, |
| 115 | // it is returned as it is unless it's pretty close to two supported |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 116 | // formats but not well-formed. In that case, an empty string is returned. |
| 117 | // |
| 118 | // In any case, a caller must check for the empty return value and resort to |
| 119 | // another means to get a filename (e.g. url). |
| 120 | // |
| 121 | // This function does not do any escaping and callers are responsible for |
| 122 | // escaping 'unsafe' characters (e.g. (back)slash, colon) as they see fit. |
| 123 | // |
| 124 | // TODO(jungshik): revisit this issue. At the moment, the only caller |
| 125 | // net_util::GetSuggestedFilename and it calls ReplaceIllegalCharacters. The |
| 126 | // other caller is a unit test. Need to figure out expose this function only to |
| 127 | // net_util_unittest. |
| 128 | // |
[email protected] | de294335 | 2009-10-22 23:06:12 | [diff] [blame] | 129 | std::string GetFileNameFromCD(const std::string& header, |
| 130 | const std::string& referrer_charset); |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 131 | |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 132 | // Converts the given host name to unicode characters. This can be called for |
| 133 | // any host name, if the input is not IDN or is invalid in some way, we'll just |
| 134 | // return the ASCII source so it is still usable. |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 135 | // |
| 136 | // The input should be the canonicalized ASCII host name from GURL. This |
| 137 | // function does NOT accept UTF-8! Its length must also be given (this is |
| 138 | // designed to work on the substring of the host out of a URL spec). |
| 139 | // |
| 140 | // |languages| is a comma separated list of ISO 639 language codes. It |
| 141 | // is used to determine whether a hostname is 'comprehensible' to a user |
| 142 | // who understands languages listed. |host| will be converted to a |
| 143 | // human-readable form (Unicode) ONLY when each component of |host| is |
| 144 | // regarded as 'comprehensible'. Scipt-mixing is not allowed except that |
| 145 | // Latin letters in the ASCII range can be mixed with a limited set of |
| 146 | // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko). |
| 147 | // When |languages| is empty, even that mixing is not allowed. |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 148 | // |
| 149 | // |offset_for_adjustment| is an offset into |host|, which will be adjusted to |
| 150 | // point at the same logical place in the output string. If this isn't possible |
| 151 | // because it points past the end of |host| or into the middle of a punycode |
| 152 | // sequence, it will be set to std::wstring::npos. |offset_for_adjustment| may |
| 153 | // be NULL. |
| 154 | std::wstring IDNToUnicode(const char* host, |
| 155 | size_t host_len, |
| 156 | const std::wstring& languages, |
| 157 | size_t* offset_for_adjustment); |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 158 | |
[email protected] | 01dbd93 | 2009-06-23 22:52:42 | [diff] [blame] | 159 | // Canonicalizes |host| and returns it. Also fills |host_info| with |
| 160 | // IP address information. |host_info| must not be NULL. |
| 161 | std::string CanonicalizeHost(const std::string& host, |
| 162 | url_canon::CanonHostInfo* host_info); |
| 163 | std::string CanonicalizeHost(const std::wstring& host, |
| 164 | url_canon::CanonHostInfo* host_info); |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 165 | |
[email protected] | 833fa26 | 2009-10-27 21:06:11 | [diff] [blame] | 166 | // Returns true if |host| is not an IP address and is compliant with a set of |
| 167 | // rules based on RFC 1738 and tweaked to be compatible with the real world. |
| 168 | // The rules are: |
[email protected] | 96be43e9 | 2009-10-16 19:49:22 | [diff] [blame] | 169 | // * One or more components separated by '.' |
| 170 | // * Each component begins and ends with an alphanumeric character |
[email protected] | 833fa26 | 2009-10-27 21:06:11 | [diff] [blame] | 171 | // * Each component contains only alphanumeric characters and '-' or '_' |
[email protected] | 96be43e9 | 2009-10-16 19:49:22 | [diff] [blame] | 172 | // * The last component does not begin with a digit |
[email protected] | d120c372 | 2009-11-03 18:17:26 | [diff] [blame] | 173 | // * Optional trailing dot after last component (means "treat as FQDN") |
[email protected] | 96be43e9 | 2009-10-16 19:49:22 | [diff] [blame] | 174 | // |
| 175 | // NOTE: You should only pass in hosts that have been returned from |
| 176 | // CanonicalizeHost(), or you may not get accurate results. |
[email protected] | 833fa26 | 2009-10-27 21:06:11 | [diff] [blame] | 177 | bool IsCanonicalizedHostCompliant(const std::string& host); |
[email protected] | 96be43e9 | 2009-10-16 19:49:22 | [diff] [blame] | 178 | |
[email protected] | 5420bc1e | 2009-07-09 22:48:16 | [diff] [blame] | 179 | // Call these functions to get the html snippet for a directory listing. |
| 180 | // The return values of both functions are in UTF-8. |
| 181 | std::string GetDirectoryListingHeader(const string16& title); |
| 182 | |
| 183 | // Given the name of a file in a directory (ftp or local) and |
| 184 | // other information (is_dir, size, modification time), it returns |
| 185 | // the html snippet to add the entry for the file to the directory listing. |
| 186 | // Currently, it's a script tag containing a call to a Javascript function |
| 187 | // |addRow|. |
| 188 | // |
| 189 | // Its 1st parameter is derived from |name| and is the Javascript-string |
| 190 | // escaped form of |name| (i.e \uXXXX). The 2nd parameter is the url-escaped |
| 191 | // |raw_bytes| if it's not empty. If empty, the 2nd parameter is the |
| 192 | // url-escaped |name| in UTF-8. |
| 193 | std::string GetDirectoryListingEntry(const string16& name, |
| 194 | const std::string& raw_bytes, |
| 195 | bool is_dir, int64 size, |
| 196 | base::Time modified); |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 197 | |
| 198 | // If text starts with "www." it is removed, otherwise text is returned |
| 199 | // unmodified. |
| 200 | std::wstring StripWWW(const std::wstring& text); |
| 201 | |
| 202 | // Gets the filename from the raw Content-Disposition header (as read from the |
| 203 | // network). Otherwise uses the last path component name or hostname from |
[email protected] | 630947c | 2009-11-04 18:37:31 | [diff] [blame] | 204 | // |url|. If there is no filename or it can't be used, the given |default_name|, |
| 205 | // will be used unless it is empty. |
[email protected] | de294335 | 2009-10-22 23:06:12 | [diff] [blame] | 206 | |
| 207 | // Note: it's possible for the suggested filename to be empty (e.g., |
[email protected] | 13c34d1 | 2009-09-14 20:51:04 | [diff] [blame] | 208 | // file:///). referrer_charset is used as one of charsets |
[email protected] | c9825a4 | 2009-05-01 22:51:50 | [diff] [blame] | 209 | // to interpret a raw 8bit string in C-D header (after interpreting |
| 210 | // as UTF-8 fails). See the comment for GetFilenameFromCD for more details. |
[email protected] | de294335 | 2009-10-22 23:06:12 | [diff] [blame] | 211 | FilePath GetSuggestedFilename(const GURL& url, |
| 212 | const std::string& content_disposition, |
| 213 | const std::string& referrer_charset, |
[email protected] | 630947c | 2009-11-04 18:37:31 | [diff] [blame] | 214 | const FilePath& default_name); |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 215 | |
| 216 | // Checks the given port against a list of ports which are restricted by |
| 217 | // default. Returns true if the port is allowed, false if it is restricted. |
| 218 | bool IsPortAllowedByDefault(int port); |
| 219 | |
| 220 | // Checks the given port against a list of ports which are restricted by the |
| 221 | // FTP protocol. Returns true if the port is allowed, false if it is |
| 222 | // restricted. |
| 223 | bool IsPortAllowedByFtp(int port); |
| 224 | |
[email protected] | d95fa18 | 2009-09-09 17:01:16 | [diff] [blame] | 225 | // Check if banned |port| has been overriden by an entry in |
| 226 | // |explicitly_allowed_ports_|. |
| 227 | bool IsPortAllowedByOverride(int port); |
| 228 | |
[email protected] | f6f1ba3cf | 2008-11-11 01:06:15 | [diff] [blame] | 229 | // Set socket to non-blocking mode |
| 230 | int SetNonBlocking(int fd); |
| 231 | |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 232 | // Appends the given part of the original URL to the output string formatted for |
| 233 | // the user. The given parsed structure will be updated. The host name formatter |
| 234 | // also takes the same accept languages component as ElideURL. |new_parsed| may |
| 235 | // be null. |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 236 | void AppendFormattedHost(const GURL& url, |
| 237 | const std::wstring& languages, |
| 238 | std::wstring* output, |
| 239 | url_parse::Parsed* new_parsed, |
| 240 | size_t* offset_for_adjustment); |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 241 | |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 242 | // Creates a string representation of |url|. The IDN host name may be in Unicode |
| 243 | // if |languages| accepts the Unicode representation. If |
| 244 | // |omit_username_password| is true, any username and password are removed. |
| 245 | // |unescape_rules| defines how to clean the URL for human readability. |
[email protected] | a23de857 | 2009-06-03 02:16:32 | [diff] [blame] | 246 | // You will generally want |UnescapeRule::SPACES| for display to the user if you |
| 247 | // can handle spaces, or |UnescapeRule::NORMAL| if not. If the path part and the |
| 248 | // query part seem to be encoded in %-encoded UTF-8, decodes %-encoding and |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 249 | // UTF-8. |
| 250 | // |
| 251 | // The last three parameters may be NULL. |
| 252 | // |new_parsed| will be set to the parsing parameters of the resultant URL. |
[email protected] | a23de857 | 2009-06-03 02:16:32 | [diff] [blame] | 253 | // |prefix_end| will be the length before the hostname of the resultant URL. |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 254 | // |offset_for_adjustment| is an offset into the original |url|'s spec(), which |
| 255 | // will be modified to reflect changes this function makes to the output string; |
| 256 | // for example, if |url| is "http://a:[email protected]/", |omit_username_password| is |
| 257 | // true, and |offset_for_adjustment| is 12 (the offset of '.'), then on return |
| 258 | // the output string will be "http://c.com/" and |offset_for_adjustment| will be |
| 259 | // 8. If the offset cannot be successfully adjusted (e.g. because it points |
| 260 | // into the middle of a component that was entirely removed, past the end of the |
| 261 | // string, or into the middle of an encoding sequence), it will be set to |
| 262 | // std::wstring::npos. |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 263 | std::wstring FormatUrl(const GURL& url, |
| 264 | const std::wstring& languages, |
| 265 | bool omit_username_password, |
[email protected] | a23de857 | 2009-06-03 02:16:32 | [diff] [blame] | 266 | UnescapeRule::Type unescape_rules, |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 267 | url_parse::Parsed* new_parsed, |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 268 | size_t* prefix_end, |
| 269 | size_t* offset_for_adjustment); |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 270 | |
| 271 | // Creates a string representation of |url| for display to the user. |
| 272 | // This is a shorthand of the above function with omit_username_password=true, |
[email protected] | a23de857 | 2009-06-03 02:16:32 | [diff] [blame] | 273 | // unescape=SPACES, new_parsed=NULL, and prefix_end=NULL. |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 274 | inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { |
[email protected] | ce85f60 | 2009-11-07 01:34:53 | [diff] [blame^] | 275 | return FormatUrl(url, languages, true, UnescapeRule::SPACES, NULL, NULL, |
| 276 | NULL); |
[email protected] | f9fe863 | 2009-05-22 18:15:24 | [diff] [blame] | 277 | } |
| 278 | |
[email protected] | 7ec7c18 | 2009-08-04 02:33:58 | [diff] [blame] | 279 | // Strip the portions of |url| that aren't core to the network request. |
| 280 | // - user name / password |
| 281 | // - reference section |
| 282 | GURL SimplifyUrlForRequest(const GURL& url); |
| 283 | |
[email protected] | d95fa18 | 2009-09-09 17:01:16 | [diff] [blame] | 284 | void SetExplicitlyAllowedPorts(const std::wstring& allowed_ports); |
| 285 | |
[email protected] | 8ac1a75 | 2008-07-31 19:40:37 | [diff] [blame] | 286 | } // namespace net |
initial.commit | 586acc5fe | 2008-07-26 22:42:52 | [diff] [blame] | 287 | |
[email protected] | de294335 | 2009-10-22 23:06:12 | [diff] [blame] | 288 | #endif // NET_BASE_NET_UTIL_H_ |