[email protected] | 51bcc5d | 2013-04-24 01:41:37 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 4 | |
[email protected] | 318076b | 2013-04-18 21:19:45 | [diff] [blame] | 5 | #include "url/url_util.h" |
| 6 | |
avi | c0c6031 | 2015-12-21 21:03:50 | [diff] [blame] | 7 | #include <stddef.h> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 8 | #include <string.h> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 9 | |
[email protected] | 8d892fa8 | 2014-07-02 12:42:04 | [diff] [blame] | 10 | #include "base/debug/leak_annotations.h" |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 11 | #include "base/logging.h" |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 12 | #include "base/strings/string_util.h" |
clamy | eff9252 | 2017-01-23 22:48:56 | [diff] [blame] | 13 | #include "url/gurl.h" |
[email protected] | 318076b | 2013-04-18 21:19:45 | [diff] [blame] | 14 | #include "url/url_canon_internal.h" |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 15 | #include "url/url_constants.h" |
[email protected] | 318076b | 2013-04-18 21:19:45 | [diff] [blame] | 16 | #include "url/url_file.h" |
| 17 | #include "url/url_util_internal.h" |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 18 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 19 | namespace url { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 20 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 21 | namespace { |
| 22 | |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 23 | // Pass this enum through for methods which would like to know if whitespace |
| 24 | // removal is necessary. |
| 25 | enum WhitespaceRemovalPolicy { |
| 26 | REMOVE_WHITESPACE, |
| 27 | DO_NOT_REMOVE_WHITESPACE, |
| 28 | }; |
| 29 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 30 | const SchemeWithType kStandardURLSchemes[] = { |
jww | 0448040 | 2016-10-25 02:50:33 | [diff] [blame] | 31 | {kHttpScheme, SCHEME_WITH_PORT}, |
| 32 | {kHttpsScheme, SCHEME_WITH_PORT}, |
| 33 | // Yes, file URLs can have a hostname, so file URLs should be handled as |
| 34 | // "standard". File URLs never have a port as specified by the SchemeType |
| 35 | // field. |
| 36 | {kFileScheme, SCHEME_WITHOUT_PORT}, |
| 37 | {kFtpScheme, SCHEME_WITH_PORT}, |
| 38 | {kGopherScheme, SCHEME_WITH_PORT}, |
| 39 | {kWsScheme, SCHEME_WITH_PORT}, // WebSocket. |
| 40 | {kWssScheme, SCHEME_WITH_PORT}, // WebSocket secure. |
| 41 | {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY}, |
| 42 | {kHttpSuboriginScheme, SCHEME_WITH_PORT}, |
| 43 | {kHttpsSuboriginScheme, SCHEME_WITH_PORT}, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 44 | }; |
| 45 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 46 | const SchemeWithType kReferrerURLSchemes[] = { |
jww | 0448040 | 2016-10-25 02:50:33 | [diff] [blame] | 47 | {kHttpScheme, SCHEME_WITH_PORT}, |
| 48 | {kHttpsScheme, SCHEME_WITH_PORT}, |
| 49 | {kHttpSuboriginScheme, SCHEME_WITH_PORT}, |
| 50 | {kHttpsSuboriginScheme, SCHEME_WITH_PORT}, |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 51 | }; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 52 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 53 | const char* kSecureSchemes[] = { |
| 54 | kHttpsScheme, |
| 55 | kAboutScheme, |
| 56 | kDataScheme, |
| 57 | kWssScheme, |
| 58 | }; |
| 59 | |
| 60 | const char* kLocalSchemes[] = { |
| 61 | kFileScheme, |
| 62 | }; |
| 63 | |
| 64 | const char* kNoAccessSchemes[] = { |
| 65 | kAboutScheme, |
| 66 | kJavaScriptScheme, |
| 67 | kDataScheme, |
| 68 | }; |
| 69 | |
| 70 | const char* kCORSEnabledSchemes[] = { |
| 71 | kHttpScheme, |
| 72 | kHttpsScheme, |
| 73 | kDataScheme, |
| 74 | }; |
| 75 | |
| 76 | bool initialized = false; |
| 77 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 78 | // Lists of the currently installed standard and referrer schemes. These lists |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 79 | // are lazily initialized by Initialize and are leaked on shutdown to prevent |
| 80 | // any destructors from being called that will slow us down or cause problems. |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 81 | std::vector<SchemeWithType>* standard_schemes = nullptr; |
| 82 | std::vector<SchemeWithType>* referrer_schemes = nullptr; |
| 83 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 84 | // Similar to above, initialized by the Init*Schemes methods. |
| 85 | std::vector<std::string>* secure_schemes = nullptr; |
| 86 | std::vector<std::string>* local_schemes = nullptr; |
| 87 | std::vector<std::string>* no_access_schemes = nullptr; |
| 88 | std::vector<std::string>* cors_enabled_schemes = nullptr; |
| 89 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 90 | // See the LockSchemeRegistries declaration in the header. |
| 91 | bool scheme_registries_locked = false; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 92 | |
brettw | 8511167 | 2015-07-23 21:56:35 | [diff] [blame] | 93 | // This template converts a given character type to the corresponding |
| 94 | // StringPiece type. |
| 95 | template<typename CHAR> struct CharToStringPiece { |
| 96 | }; |
| 97 | template<> struct CharToStringPiece<char> { |
| 98 | typedef base::StringPiece Piece; |
| 99 | }; |
| 100 | template<> struct CharToStringPiece<base::char16> { |
| 101 | typedef base::StringPiece16 Piece; |
| 102 | }; |
| 103 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 104 | void InitSchemes(std::vector<std::string>** schemes, |
| 105 | const char** initial_schemes, |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 106 | size_t size) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 107 | *schemes = new std::vector<std::string>(size); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 108 | for (size_t i = 0; i < size; i++) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 109 | (*(*schemes))[i] = initial_schemes[i]; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 110 | } |
| 111 | } |
| 112 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 113 | void InitSchemesWithType(std::vector<SchemeWithType>** schemes, |
| 114 | const SchemeWithType* initial_schemes, |
| 115 | size_t size) { |
| 116 | *schemes = new std::vector<SchemeWithType>(size); |
| 117 | for (size_t i = 0; i < size; i++) { |
| 118 | (*(*schemes))[i] = initial_schemes[i]; |
| 119 | } |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 120 | } |
| 121 | |
| 122 | // Given a string and a range inside the string, compares it to the given |
| 123 | // lower-case |compare_to| buffer. |
| 124 | template<typename CHAR> |
| 125 | inline bool DoCompareSchemeComponent(const CHAR* spec, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 126 | const Component& component, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 127 | const char* compare_to) { |
| 128 | if (!component.is_nonempty()) |
| 129 | return compare_to[0] == 0; // When component is empty, match empty scheme. |
brettw | 8511167 | 2015-07-23 21:56:35 | [diff] [blame] | 130 | return base::LowerCaseEqualsASCII( |
| 131 | typename CharToStringPiece<CHAR>::Piece( |
| 132 | &spec[component.begin], component.len), |
| 133 | compare_to); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 134 | } |
| 135 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 136 | // Returns true and sets |type| to the SchemeType of the given scheme |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 137 | // identified by |scheme| within |spec| if in |schemes|. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 138 | template<typename CHAR> |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 139 | bool DoIsInSchemes(const CHAR* spec, |
| 140 | const Component& scheme, |
| 141 | SchemeType* type, |
| 142 | const std::vector<SchemeWithType>& schemes) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 143 | if (!scheme.is_nonempty()) |
| 144 | return false; // Empty or invalid schemes are non-standard. |
| 145 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 146 | for (const SchemeWithType& scheme_with_type : schemes) { |
| 147 | if (base::LowerCaseEqualsASCII(typename CharToStringPiece<CHAR>::Piece( |
| 148 | &spec[scheme.begin], scheme.len), |
| 149 | scheme_with_type.scheme)) { |
| 150 | *type = scheme_with_type.type; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 151 | return true; |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 152 | } |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 153 | } |
| 154 | return false; |
| 155 | } |
| 156 | |
| 157 | template<typename CHAR> |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 158 | bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 159 | Initialize(); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 160 | return DoIsInSchemes(spec, scheme, type, *standard_schemes); |
| 161 | } |
| 162 | |
| 163 | |
| 164 | template<typename CHAR> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 165 | bool DoFindAndCompareScheme(const CHAR* str, |
| 166 | int str_len, |
| 167 | const char* compare, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 168 | Component* found_scheme) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 169 | // Before extracting scheme, canonicalize the URL to remove any whitespace. |
| 170 | // This matches the canonicalization done in DoCanonicalize function. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 171 | RawCanonOutputT<CHAR> whitespace_buffer; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 172 | int spec_len; |
| 173 | const CHAR* spec = RemoveURLWhitespace(str, str_len, |
| 174 | &whitespace_buffer, &spec_len); |
| 175 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 176 | Component our_scheme; |
| 177 | if (!ExtractScheme(spec, spec_len, &our_scheme)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 178 | // No scheme. |
| 179 | if (found_scheme) |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 180 | *found_scheme = Component(); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 181 | return false; |
| 182 | } |
| 183 | if (found_scheme) |
| 184 | *found_scheme = our_scheme; |
| 185 | return DoCompareSchemeComponent(spec, our_scheme, compare); |
| 186 | } |
| 187 | |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 188 | template <typename CHAR> |
| 189 | bool DoCanonicalize(const CHAR* spec, |
| 190 | int spec_len, |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 191 | bool trim_path_end, |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 192 | WhitespaceRemovalPolicy whitespace_policy, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 193 | CharsetConverter* charset_converter, |
| 194 | CanonOutput* output, |
| 195 | Parsed* output_parsed) { |
csharrison | 60e6ff0e | 2017-01-31 23:59:29 | [diff] [blame] | 196 | output->ReserveSizeIfNeeded(spec_len); |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 197 | |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 198 | // Remove any whitespace from the middle of the relative URL if necessary. |
| 199 | // Possibly this will result in copying to the new buffer. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 200 | RawCanonOutputT<CHAR> whitespace_buffer; |
mkwst | 41318f4 | 2017-01-19 15:11:50 | [diff] [blame] | 201 | if (whitespace_policy == REMOVE_WHITESPACE) { |
| 202 | int original_len = spec_len; |
| 203 | spec = |
| 204 | RemoveURLWhitespace(spec, original_len, &whitespace_buffer, &spec_len); |
| 205 | if (spec_len != original_len) |
| 206 | output_parsed->whitespace_removed = true; |
| 207 | } |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 208 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 209 | Parsed parsed_input; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 210 | #ifdef WIN32 |
| 211 | // For Windows, we allow things that look like absolute Windows paths to be |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 212 | // fixed up magically to file URLs. This is done for IE compatibility. For |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 213 | // example, this will change "c:/foo" into a file URL rather than treating |
| 214 | // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). |
| 215 | // There is similar logic in url_canon_relative.cc for |
| 216 | // |
| 217 | // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which |
| 218 | // has no meaning as an absolute path name. This is because browsers on Mac |
| 219 | // & Unix don't generally do this, so there is no compatibility reason for |
| 220 | // doing so. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 221 | if (DoesBeginUNCPath(spec, 0, spec_len, false) || |
| 222 | DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { |
| 223 | ParseFileURL(spec, spec_len, &parsed_input); |
| 224 | return CanonicalizeFileURL(spec, spec_len, parsed_input, charset_converter, |
| 225 | output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 226 | } |
| 227 | #endif |
| 228 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 229 | Component scheme; |
| 230 | if (!ExtractScheme(spec, spec_len, &scheme)) |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 231 | return false; |
| 232 | |
| 233 | // This is the parsed version of the input URL, we have to canonicalize it |
| 234 | // before storing it in our object. |
| 235 | bool success; |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 236 | SchemeType unused_scheme_type = SCHEME_WITH_PORT; |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 237 | if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 238 | // File URLs are special. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 239 | ParseFileURL(spec, spec_len, &parsed_input); |
| 240 | success = CanonicalizeFileURL(spec, spec_len, parsed_input, |
| 241 | charset_converter, output, output_parsed); |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 242 | } else if (DoCompareSchemeComponent(spec, scheme, url::kFileSystemScheme)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 243 | // Filesystem URLs are special. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 244 | ParseFileSystemURL(spec, spec_len, &parsed_input); |
| 245 | success = CanonicalizeFileSystemURL(spec, spec_len, parsed_input, |
| 246 | charset_converter, output, |
| 247 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 248 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 249 | } else if (DoIsStandard(spec, scheme, &unused_scheme_type)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 250 | // All "normal" URLs. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 251 | ParseStandardURL(spec, spec_len, &parsed_input); |
| 252 | success = CanonicalizeStandardURL(spec, spec_len, parsed_input, |
| 253 | charset_converter, output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 254 | |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 255 | } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) { |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 256 | // Mailto URLs are treated like standard URLs, with only a scheme, path, |
| 257 | // and query. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 258 | ParseMailtoURL(spec, spec_len, &parsed_input); |
| 259 | success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output, |
| 260 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 261 | |
| 262 | } else { |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 263 | // "Weird" URLs like data: and javascript:. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 264 | ParsePathURL(spec, spec_len, trim_path_end, &parsed_input); |
| 265 | success = CanonicalizePathURL(spec, spec_len, parsed_input, output, |
| 266 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 267 | } |
| 268 | return success; |
| 269 | } |
| 270 | |
| 271 | template<typename CHAR> |
| 272 | bool DoResolveRelative(const char* base_spec, |
| 273 | int base_spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 274 | const Parsed& base_parsed, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 275 | const CHAR* in_relative, |
| 276 | int in_relative_length, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 277 | CharsetConverter* charset_converter, |
| 278 | CanonOutput* output, |
| 279 | Parsed* output_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 280 | // Remove any whitespace from the middle of the relative URL, possibly |
| 281 | // copying to the new buffer. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 282 | RawCanonOutputT<CHAR> whitespace_buffer; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 283 | int relative_length; |
| 284 | const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length, |
| 285 | &whitespace_buffer, |
| 286 | &relative_length); |
mkwst | 41318f4 | 2017-01-19 15:11:50 | [diff] [blame] | 287 | if (in_relative_length != relative_length) |
| 288 | output_parsed->whitespace_removed = true; |
| 289 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 290 | bool base_is_authority_based = false; |
| 291 | bool base_is_hierarchical = false; |
| 292 | if (base_spec && |
| 293 | base_parsed.scheme.is_nonempty()) { |
| 294 | int after_scheme = base_parsed.scheme.end() + 1; // Skip past the colon. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 295 | int num_slashes = CountConsecutiveSlashes(base_spec, after_scheme, |
| 296 | base_spec_len); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 297 | base_is_authority_based = num_slashes > 1; |
| 298 | base_is_hierarchical = num_slashes > 0; |
| 299 | } |
| 300 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 301 | SchemeType unused_scheme_type = SCHEME_WITH_PORT; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 302 | bool standard_base_scheme = |
| 303 | base_parsed.scheme.is_nonempty() && |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 304 | DoIsStandard(base_spec, base_parsed.scheme, &unused_scheme_type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 305 | |
| 306 | bool is_relative; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 307 | Component relative_component; |
| 308 | if (!IsRelativeURL(base_spec, base_parsed, relative, relative_length, |
| 309 | (base_is_hierarchical || standard_base_scheme), |
| 310 | &is_relative, &relative_component)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 311 | // Error resolving. |
| 312 | return false; |
| 313 | } |
| 314 | |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 315 | // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and |
| 316 | // ReserveRelativeURL, to enable more accurate buffer sizes. |
| 317 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 318 | // Pretend for a moment that |base_spec| is a standard URL. Normally |
| 319 | // non-standard URLs are treated as PathURLs, but if the base has an |
| 320 | // authority we would like to preserve it. |
| 321 | if (is_relative && base_is_authority_based && !standard_base_scheme) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 322 | Parsed base_parsed_authority; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 323 | ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority); |
| 324 | if (base_parsed_authority.host.is_nonempty()) { |
zherczeg.u-szeged | 1e2171c | 2014-12-04 11:52:36 | [diff] [blame] | 325 | RawCanonOutputT<char> temporary_output; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 326 | bool did_resolve_succeed = |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 327 | ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, |
zherczeg.u-szeged | 1e2171c | 2014-12-04 11:52:36 | [diff] [blame] | 328 | relative_component, charset_converter, |
| 329 | &temporary_output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 330 | // The output_parsed is incorrect at this point (because it was built |
| 331 | // based on base_parsed_authority instead of base_parsed) and needs to be |
| 332 | // re-created. |
zherczeg.u-szeged | 1e2171c | 2014-12-04 11:52:36 | [diff] [blame] | 333 | DoCanonicalize(temporary_output.data(), temporary_output.length(), true, |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 334 | REMOVE_WHITESPACE, charset_converter, output, |
| 335 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 336 | return did_resolve_succeed; |
| 337 | } |
| 338 | } else if (is_relative) { |
| 339 | // Relative, resolve and canonicalize. |
| 340 | bool file_base_scheme = base_parsed.scheme.is_nonempty() && |
| 341 | DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 342 | return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative, |
| 343 | relative_component, charset_converter, output, |
| 344 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 345 | } |
| 346 | |
| 347 | // Not relative, canonicalize the input. |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 348 | return DoCanonicalize(relative, relative_length, true, |
| 349 | DO_NOT_REMOVE_WHITESPACE, charset_converter, output, |
| 350 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 351 | } |
| 352 | |
| 353 | template<typename CHAR> |
| 354 | bool DoReplaceComponents(const char* spec, |
| 355 | int spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 356 | const Parsed& parsed, |
| 357 | const Replacements<CHAR>& replacements, |
| 358 | CharsetConverter* charset_converter, |
| 359 | CanonOutput* output, |
| 360 | Parsed* out_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 361 | // If the scheme is overridden, just do a simple string substitution and |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 362 | // re-parse the whole thing. There are lots of edge cases that we really don't |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 363 | // want to deal with. Like what happens if I replace "http://e:8080/foo" |
| 364 | // with a file. Does it become "file:///E:/8080/foo" where the port number |
| 365 | // becomes part of the path? Parsing that string as a file URL says "yes" |
| 366 | // but almost no sane rule for dealing with the components individually would |
| 367 | // come up with that. |
| 368 | // |
| 369 | // Why allow these crazy cases at all? Programatically, there is almost no |
| 370 | // case for replacing the scheme. The most common case for hitting this is |
| 371 | // in JS when building up a URL using the location object. In this case, the |
| 372 | // JS code expects the string substitution behavior: |
| 373 | // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3 |
| 374 | if (replacements.IsSchemeOverridden()) { |
| 375 | // Canonicalize the new scheme so it is 8-bit and can be concatenated with |
| 376 | // the existing spec. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 377 | RawCanonOutput<128> scheme_replaced; |
| 378 | Component scheme_replaced_parsed; |
| 379 | CanonicalizeScheme(replacements.sources().scheme, |
| 380 | replacements.components().scheme, |
| 381 | &scheme_replaced, &scheme_replaced_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 382 | |
| 383 | // We can assume that the input is canonicalized, which means it always has |
| 384 | // a colon after the scheme (or where the scheme would be). |
| 385 | int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1 |
| 386 | : 1; |
| 387 | if (spec_len - spec_after_colon > 0) { |
| 388 | scheme_replaced.Append(&spec[spec_after_colon], |
| 389 | spec_len - spec_after_colon); |
| 390 | } |
| 391 | |
| 392 | // We now need to completely re-parse the resulting string since its meaning |
| 393 | // may have changed with the different scheme. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 394 | RawCanonOutput<128> recanonicalized; |
| 395 | Parsed recanonicalized_parsed; |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 396 | DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 397 | REMOVE_WHITESPACE, charset_converter, &recanonicalized, |
| 398 | &recanonicalized_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 399 | |
| 400 | // Recurse using the version with the scheme already replaced. This will now |
| 401 | // use the replacement rules for the new scheme. |
| 402 | // |
| 403 | // Warning: this code assumes that ReplaceComponents will re-check all |
| 404 | // components for validity. This is because we can't fail if DoCanonicalize |
| 405 | // failed above since theoretically the thing making it fail could be |
| 406 | // getting replaced here. If ReplaceComponents didn't re-check everything, |
| 407 | // we wouldn't know if something *not* getting replaced is a problem. |
| 408 | // If the scheme-specific replacers are made more intelligent so they don't |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 409 | // re-check everything, we should instead re-canonicalize the whole thing |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 410 | // after this call to check validity (this assumes replacing the scheme is |
| 411 | // much much less common than other types of replacements, like clearing the |
| 412 | // ref). |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 413 | Replacements<CHAR> replacements_no_scheme = replacements; |
| 414 | replacements_no_scheme.SetScheme(NULL, Component()); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 415 | return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(), |
| 416 | recanonicalized_parsed, replacements_no_scheme, |
| 417 | charset_converter, output, out_parsed); |
| 418 | } |
| 419 | |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 420 | // TODO(csharrison): We could be smarter about size to reserve if this is done |
| 421 | // in callers below, and the code checks to see which components are being |
| 422 | // replaced, and with what length. If this ends up being a hot spot it should |
| 423 | // be changed. |
csharrison | 60e6ff0e | 2017-01-31 23:59:29 | [diff] [blame] | 424 | output->ReserveSizeIfNeeded(spec_len); |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 425 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 426 | // If we get here, then we know the scheme doesn't need to be replaced, so can |
| 427 | // just key off the scheme in the spec to know how to do the replacements. |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 428 | if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 429 | return ReplaceFileURL(spec, parsed, replacements, charset_converter, output, |
| 430 | out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 431 | } |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 432 | if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileSystemScheme)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 433 | return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter, |
| 434 | output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 435 | } |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 436 | SchemeType unused_scheme_type = SCHEME_WITH_PORT; |
| 437 | if (DoIsStandard(spec, parsed.scheme, &unused_scheme_type)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 438 | return ReplaceStandardURL(spec, parsed, replacements, charset_converter, |
| 439 | output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 440 | } |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 441 | if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 442 | return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 443 | } |
| 444 | |
| 445 | // Default is a path URL. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 446 | return ReplacePathURL(spec, parsed, replacements, output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 447 | } |
| 448 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 449 | void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) { |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 450 | DCHECK(schemes); |
| 451 | // If this assert triggers, it means you've called Add*Scheme after |
| 452 | // LockSchemeRegistries has been called (see the header file for |
| 453 | // LockSchemeRegistries for more). |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 454 | // |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 455 | // This normally means you're trying to set up a new scheme too late in your |
| 456 | // application's init process. Locate where your app does this initialization |
| 457 | // and calls LockSchemeRegistries, and add your new scheme there. |
| 458 | DCHECK(!scheme_registries_locked) |
| 459 | << "Trying to add a scheme after the lists have been locked."; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 460 | |
| 461 | size_t scheme_len = strlen(new_scheme); |
| 462 | if (scheme_len == 0) |
| 463 | return; |
| 464 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 465 | DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme); |
| 466 | schemes->push_back(std::string(new_scheme)); |
| 467 | } |
| 468 | |
| 469 | void DoAddSchemeWithType(const char* new_scheme, |
| 470 | SchemeType type, |
| 471 | std::vector<SchemeWithType>* schemes) { |
| 472 | DCHECK(schemes); |
| 473 | // If this assert triggers, it means you've called Add*Scheme after |
| 474 | // LockSchemeRegistries has been called (see the header file for |
| 475 | // LockSchemeRegistries for more). |
| 476 | // |
| 477 | // This normally means you're trying to set up a new scheme too late in your |
| 478 | // application's init process. Locate where your app does this initialization |
| 479 | // and calls LockSchemeRegistries, and add your new scheme there. |
| 480 | DCHECK(!scheme_registries_locked) |
| 481 | << "Trying to add a scheme after the lists have been locked."; |
| 482 | |
| 483 | size_t scheme_len = strlen(new_scheme); |
| 484 | if (scheme_len == 0) |
| 485 | return; |
| 486 | |
| 487 | DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme); |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 488 | // Duplicate the scheme into a new buffer and add it to the list of standard |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 489 | // schemes. This pointer will be leaked on shutdown. |
| 490 | char* dup_scheme = new char[scheme_len + 1]; |
[email protected] | 8d892fa8 | 2014-07-02 12:42:04 | [diff] [blame] | 491 | ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 492 | memcpy(dup_scheme, new_scheme, scheme_len + 1); |
| 493 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 494 | SchemeWithType scheme_with_type; |
| 495 | scheme_with_type.scheme = dup_scheme; |
| 496 | scheme_with_type.type = type; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 497 | schemes->push_back(scheme_with_type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 498 | } |
| 499 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 500 | } // namespace |
| 501 | |
| 502 | void Initialize() { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 503 | if (initialized) |
| 504 | return; |
| 505 | InitSchemesWithType(&standard_schemes, kStandardURLSchemes, |
| 506 | arraysize(kStandardURLSchemes)); |
| 507 | InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes, |
| 508 | arraysize(kReferrerURLSchemes)); |
| 509 | InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes)); |
| 510 | InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes)); |
| 511 | InitSchemes(&no_access_schemes, kNoAccessSchemes, |
| 512 | arraysize(kNoAccessSchemes)); |
| 513 | InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes, |
| 514 | arraysize(kCORSEnabledSchemes)); |
| 515 | initialized = true; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 516 | } |
| 517 | |
| 518 | void Shutdown() { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 519 | initialized = false; |
| 520 | delete standard_schemes; |
| 521 | standard_schemes = nullptr; |
| 522 | delete referrer_schemes; |
| 523 | referrer_schemes = nullptr; |
| 524 | delete secure_schemes; |
| 525 | secure_schemes = nullptr; |
| 526 | delete local_schemes; |
| 527 | local_schemes = nullptr; |
| 528 | delete no_access_schemes; |
| 529 | no_access_schemes = nullptr; |
| 530 | delete cors_enabled_schemes; |
| 531 | cors_enabled_schemes = nullptr; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 532 | } |
| 533 | |
| 534 | void AddStandardScheme(const char* new_scheme, SchemeType type) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 535 | Initialize(); |
| 536 | DoAddSchemeWithType(new_scheme, type, standard_schemes); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 537 | } |
| 538 | |
| 539 | void AddReferrerScheme(const char* new_scheme, SchemeType type) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 540 | Initialize(); |
| 541 | DoAddSchemeWithType(new_scheme, type, referrer_schemes); |
| 542 | } |
| 543 | |
| 544 | void AddSecureScheme(const char* new_scheme) { |
| 545 | Initialize(); |
| 546 | DoAddScheme(new_scheme, secure_schemes); |
| 547 | } |
| 548 | |
| 549 | const std::vector<std::string>& GetSecureSchemes() { |
| 550 | Initialize(); |
| 551 | return *secure_schemes; |
| 552 | } |
| 553 | |
| 554 | void AddLocalScheme(const char* new_scheme) { |
| 555 | Initialize(); |
| 556 | DoAddScheme(new_scheme, local_schemes); |
| 557 | } |
| 558 | |
| 559 | const std::vector<std::string>& GetLocalSchemes() { |
| 560 | Initialize(); |
| 561 | return *local_schemes; |
| 562 | } |
| 563 | |
| 564 | void AddNoAccessScheme(const char* new_scheme) { |
| 565 | Initialize(); |
| 566 | DoAddScheme(new_scheme, no_access_schemes); |
| 567 | } |
| 568 | |
| 569 | const std::vector<std::string>& GetNoAccessSchemes() { |
| 570 | Initialize(); |
| 571 | return *no_access_schemes; |
| 572 | } |
| 573 | |
| 574 | void AddCORSEnabledScheme(const char* new_scheme) { |
| 575 | Initialize(); |
| 576 | DoAddScheme(new_scheme, cors_enabled_schemes); |
| 577 | } |
| 578 | |
| 579 | const std::vector<std::string>& GetCORSEnabledSchemes() { |
| 580 | Initialize(); |
| 581 | return *cors_enabled_schemes; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 582 | } |
| 583 | |
| 584 | void LockSchemeRegistries() { |
| 585 | scheme_registries_locked = true; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 586 | } |
| 587 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 588 | bool IsStandard(const char* spec, const Component& scheme) { |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 589 | SchemeType unused_scheme_type; |
| 590 | return DoIsStandard(spec, scheme, &unused_scheme_type); |
| 591 | } |
| 592 | |
| 593 | bool GetStandardSchemeType(const char* spec, |
| 594 | const Component& scheme, |
| 595 | SchemeType* type) { |
| 596 | return DoIsStandard(spec, scheme, type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 597 | } |
| 598 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 599 | bool IsStandard(const base::char16* spec, const Component& scheme) { |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 600 | SchemeType unused_scheme_type; |
| 601 | return DoIsStandard(spec, scheme, &unused_scheme_type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 602 | } |
| 603 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 604 | bool IsReferrerScheme(const char* spec, const Component& scheme) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 605 | Initialize(); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 606 | SchemeType unused_scheme_type; |
| 607 | return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes); |
| 608 | } |
| 609 | |
clamy | eff9252 | 2017-01-23 22:48:56 | [diff] [blame] | 610 | bool IsAboutBlank(const GURL& url) { |
| 611 | if (!url.SchemeIs(url::kAboutScheme)) |
| 612 | return false; |
| 613 | |
| 614 | if (url.has_host() || url.has_username() || url.has_password() || |
| 615 | url.has_port()) { |
| 616 | return false; |
| 617 | } |
| 618 | |
| 619 | if (url.path() != kAboutBlankPath && url.path() != kAboutBlankWithHashPath) |
| 620 | return false; |
| 621 | |
| 622 | return true; |
| 623 | } |
| 624 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 625 | bool FindAndCompareScheme(const char* str, |
| 626 | int str_len, |
| 627 | const char* compare, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 628 | Component* found_scheme) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 629 | return DoFindAndCompareScheme(str, str_len, compare, found_scheme); |
| 630 | } |
| 631 | |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 632 | bool FindAndCompareScheme(const base::char16* str, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 633 | int str_len, |
| 634 | const char* compare, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 635 | Component* found_scheme) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 636 | return DoFindAndCompareScheme(str, str_len, compare, found_scheme); |
| 637 | } |
| 638 | |
pkalinnikov | 054f403 | 2016-08-31 10:54:17 | [diff] [blame] | 639 | bool DomainIs(base::StringPiece canonicalized_host, |
| 640 | base::StringPiece lower_ascii_domain) { |
| 641 | if (canonicalized_host.empty() || lower_ascii_domain.empty()) |
| 642 | return false; |
| 643 | |
| 644 | // If the host name ends with a dot but the input domain doesn't, then we |
| 645 | // ignore the dot in the host name. |
| 646 | size_t host_len = canonicalized_host.length(); |
| 647 | if (canonicalized_host.back() == '.' && lower_ascii_domain.back() != '.') |
| 648 | --host_len; |
| 649 | |
| 650 | if (host_len < lower_ascii_domain.length()) |
| 651 | return false; |
| 652 | |
| 653 | // |host_first_pos| is the start of the compared part of the host name, not |
| 654 | // start of the whole host name. |
| 655 | const char* host_first_pos = |
| 656 | canonicalized_host.data() + host_len - lower_ascii_domain.length(); |
| 657 | |
| 658 | if (!base::LowerCaseEqualsASCII( |
| 659 | base::StringPiece(host_first_pos, lower_ascii_domain.length()), |
| 660 | lower_ascii_domain)) { |
| 661 | return false; |
| 662 | } |
| 663 | |
| 664 | // Make sure there aren't extra characters in host before the compared part; |
| 665 | // if the host name is longer than the input domain name, then the character |
| 666 | // immediately before the compared part should be a dot. For example, |
| 667 | // www.google.com has domain "google.com", but www.iamnotgoogle.com does not. |
| 668 | if (lower_ascii_domain[0] != '.' && host_len > lower_ascii_domain.length() && |
| 669 | *(host_first_pos - 1) != '.') { |
| 670 | return false; |
| 671 | } |
| 672 | |
| 673 | return true; |
| 674 | } |
| 675 | |
csharrison | 475851da | 2016-12-17 02:19:42 | [diff] [blame] | 676 | bool HostIsIPAddress(base::StringPiece host) { |
| 677 | url::RawCanonOutputT<char, 128> ignored_output; |
| 678 | url::CanonHostInfo host_info; |
| 679 | url::CanonicalizeIPAddress(host.data(), Component(0, host.length()), |
| 680 | &ignored_output, &host_info); |
| 681 | return host_info.IsIPAddress(); |
| 682 | } |
| 683 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 684 | bool Canonicalize(const char* spec, |
| 685 | int spec_len, |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 686 | bool trim_path_end, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 687 | CharsetConverter* charset_converter, |
| 688 | CanonOutput* output, |
| 689 | Parsed* output_parsed) { |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 690 | return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 691 | charset_converter, output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 692 | } |
| 693 | |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 694 | bool Canonicalize(const base::char16* spec, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 695 | int spec_len, |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 696 | bool trim_path_end, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 697 | CharsetConverter* charset_converter, |
| 698 | CanonOutput* output, |
| 699 | Parsed* output_parsed) { |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 700 | return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 701 | charset_converter, output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 702 | } |
| 703 | |
| 704 | bool ResolveRelative(const char* base_spec, |
| 705 | int base_spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 706 | const Parsed& base_parsed, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 707 | const char* relative, |
| 708 | int relative_length, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 709 | CharsetConverter* charset_converter, |
| 710 | CanonOutput* output, |
| 711 | Parsed* output_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 712 | return DoResolveRelative(base_spec, base_spec_len, base_parsed, |
| 713 | relative, relative_length, |
| 714 | charset_converter, output, output_parsed); |
| 715 | } |
| 716 | |
| 717 | bool ResolveRelative(const char* base_spec, |
| 718 | int base_spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 719 | const Parsed& base_parsed, |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 720 | const base::char16* relative, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 721 | int relative_length, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 722 | CharsetConverter* charset_converter, |
| 723 | CanonOutput* output, |
| 724 | Parsed* output_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 725 | return DoResolveRelative(base_spec, base_spec_len, base_parsed, |
| 726 | relative, relative_length, |
| 727 | charset_converter, output, output_parsed); |
| 728 | } |
| 729 | |
| 730 | bool ReplaceComponents(const char* spec, |
| 731 | int spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 732 | const Parsed& parsed, |
| 733 | const Replacements<char>& replacements, |
| 734 | CharsetConverter* charset_converter, |
| 735 | CanonOutput* output, |
| 736 | Parsed* out_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 737 | return DoReplaceComponents(spec, spec_len, parsed, replacements, |
| 738 | charset_converter, output, out_parsed); |
| 739 | } |
| 740 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 741 | bool ReplaceComponents(const char* spec, |
| 742 | int spec_len, |
| 743 | const Parsed& parsed, |
| 744 | const Replacements<base::char16>& replacements, |
| 745 | CharsetConverter* charset_converter, |
| 746 | CanonOutput* output, |
| 747 | Parsed* out_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 748 | return DoReplaceComponents(spec, spec_len, parsed, replacements, |
| 749 | charset_converter, output, out_parsed); |
| 750 | } |
| 751 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 752 | void DecodeURLEscapeSequences(const char* input, |
| 753 | int length, |
| 754 | CanonOutputW* output) { |
| 755 | RawCanonOutputT<char> unescaped_chars; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 756 | for (int i = 0; i < length; i++) { |
| 757 | if (input[i] == '%') { |
| 758 | unsigned char ch; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 759 | if (DecodeEscaped(input, &i, length, &ch)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 760 | unescaped_chars.push_back(ch); |
| 761 | } else { |
| 762 | // Invalid escape sequence, copy the percent literal. |
| 763 | unescaped_chars.push_back('%'); |
| 764 | } |
| 765 | } else { |
| 766 | // Regular non-escaped 8-bit character. |
| 767 | unescaped_chars.push_back(input[i]); |
| 768 | } |
| 769 | } |
| 770 | |
| 771 | // Convert that 8-bit to UTF-16. It's not clear IE does this at all to |
| 772 | // JavaScript URLs, but Firefox and Safari do. |
| 773 | for (int i = 0; i < unescaped_chars.length(); i++) { |
| 774 | unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i)); |
| 775 | if (uch < 0x80) { |
| 776 | // Non-UTF-8, just append directly |
| 777 | output->push_back(uch); |
| 778 | } else { |
| 779 | // next_ch will point to the last character of the decoded |
| 780 | // character. |
| 781 | int next_character = i; |
| 782 | unsigned code_point; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 783 | if (ReadUTFChar(unescaped_chars.data(), &next_character, |
| 784 | unescaped_chars.length(), &code_point)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 785 | // Valid UTF-8 character, convert to UTF-16. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 786 | AppendUTF16Value(code_point, output); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 787 | i = next_character; |
| 788 | } else { |
| 789 | // If there are any sequences that are not valid UTF-8, we keep |
| 790 | // invalid code points and promote to UTF-16. We copy all characters |
| 791 | // from the current position to the end of the identified sequence. |
| 792 | while (i < next_character) { |
| 793 | output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); |
| 794 | i++; |
| 795 | } |
| 796 | output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); |
| 797 | } |
| 798 | } |
| 799 | } |
| 800 | } |
| 801 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 802 | void EncodeURIComponent(const char* input, int length, CanonOutput* output) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 803 | for (int i = 0; i < length; ++i) { |
| 804 | unsigned char c = static_cast<unsigned char>(input[i]); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 805 | if (IsComponentChar(c)) |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 806 | output->push_back(c); |
| 807 | else |
| 808 | AppendEscapedChar(c, output); |
| 809 | } |
| 810 | } |
| 811 | |
| 812 | bool CompareSchemeComponent(const char* spec, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 813 | const Component& component, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 814 | const char* compare_to) { |
| 815 | return DoCompareSchemeComponent(spec, component, compare_to); |
| 816 | } |
| 817 | |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 818 | bool CompareSchemeComponent(const base::char16* spec, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 819 | const Component& component, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 820 | const char* compare_to) { |
| 821 | return DoCompareSchemeComponent(spec, component, compare_to); |
| 822 | } |
| 823 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 824 | } // namespace url |