[email protected] | 51bcc5d | 2013-04-24 01:41:37 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 4 | |
[email protected] | 318076b | 2013-04-18 21:19:45 | [diff] [blame] | 5 | #include "url/url_util.h" |
| 6 | |
avi | c0c6031 | 2015-12-21 21:03:50 | [diff] [blame] | 7 | #include <stddef.h> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 8 | #include <string.h> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 9 | |
[email protected] | 8d892fa8 | 2014-07-02 12:42:04 | [diff] [blame] | 10 | #include "base/debug/leak_annotations.h" |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 11 | #include "base/logging.h" |
brettw | bc17d2c8 | 2015-06-09 22:39:08 | [diff] [blame] | 12 | #include "base/strings/string_util.h" |
[email protected] | 318076b | 2013-04-18 21:19:45 | [diff] [blame] | 13 | #include "url/url_canon_internal.h" |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 14 | #include "url/url_constants.h" |
[email protected] | 318076b | 2013-04-18 21:19:45 | [diff] [blame] | 15 | #include "url/url_file.h" |
| 16 | #include "url/url_util_internal.h" |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 17 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 18 | namespace url { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 19 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 20 | namespace { |
| 21 | |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 22 | // Pass this enum through for methods which would like to know if whitespace |
| 23 | // removal is necessary. |
| 24 | enum WhitespaceRemovalPolicy { |
| 25 | REMOVE_WHITESPACE, |
| 26 | DO_NOT_REMOVE_WHITESPACE, |
| 27 | }; |
| 28 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 29 | const SchemeWithType kStandardURLSchemes[] = { |
jww | 0448040 | 2016-10-25 02:50:33 | [diff] [blame] | 30 | {kHttpScheme, SCHEME_WITH_PORT}, |
| 31 | {kHttpsScheme, SCHEME_WITH_PORT}, |
| 32 | // Yes, file URLs can have a hostname, so file URLs should be handled as |
| 33 | // "standard". File URLs never have a port as specified by the SchemeType |
| 34 | // field. |
| 35 | {kFileScheme, SCHEME_WITHOUT_PORT}, |
| 36 | {kFtpScheme, SCHEME_WITH_PORT}, |
| 37 | {kGopherScheme, SCHEME_WITH_PORT}, |
| 38 | {kWsScheme, SCHEME_WITH_PORT}, // WebSocket. |
| 39 | {kWssScheme, SCHEME_WITH_PORT}, // WebSocket secure. |
| 40 | {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY}, |
| 41 | {kHttpSuboriginScheme, SCHEME_WITH_PORT}, |
| 42 | {kHttpsSuboriginScheme, SCHEME_WITH_PORT}, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 43 | }; |
| 44 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 45 | const SchemeWithType kReferrerURLSchemes[] = { |
jww | 0448040 | 2016-10-25 02:50:33 | [diff] [blame] | 46 | {kHttpScheme, SCHEME_WITH_PORT}, |
| 47 | {kHttpsScheme, SCHEME_WITH_PORT}, |
| 48 | {kHttpSuboriginScheme, SCHEME_WITH_PORT}, |
| 49 | {kHttpsSuboriginScheme, SCHEME_WITH_PORT}, |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 50 | }; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 51 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 52 | const char* kSecureSchemes[] = { |
| 53 | kHttpsScheme, |
| 54 | kAboutScheme, |
| 55 | kDataScheme, |
| 56 | kWssScheme, |
| 57 | }; |
| 58 | |
| 59 | const char* kLocalSchemes[] = { |
| 60 | kFileScheme, |
| 61 | }; |
| 62 | |
| 63 | const char* kNoAccessSchemes[] = { |
| 64 | kAboutScheme, |
| 65 | kJavaScriptScheme, |
| 66 | kDataScheme, |
| 67 | }; |
| 68 | |
| 69 | const char* kCORSEnabledSchemes[] = { |
| 70 | kHttpScheme, |
| 71 | kHttpsScheme, |
| 72 | kDataScheme, |
| 73 | }; |
| 74 | |
msramek | 7e5c61f | 2017-02-08 11:21:32 | [diff] [blame] | 75 | const char* kWebStorageSchemes[] = { |
| 76 | kHttpScheme, |
| 77 | kHttpsScheme, |
| 78 | kFileScheme, |
| 79 | kFtpScheme, |
| 80 | kWsScheme, |
| 81 | kWssScheme, |
| 82 | }; |
| 83 | |
jam | cc2df16 | 2017-03-23 23:54:01 | [diff] [blame] | 84 | const char* kEmptyDocumentSchemes[] = { |
| 85 | kAboutScheme, |
| 86 | }; |
| 87 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 88 | bool initialized = false; |
| 89 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 90 | // Lists of the currently installed standard and referrer schemes. These lists |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 91 | // are lazily initialized by Initialize and are leaked on shutdown to prevent |
| 92 | // any destructors from being called that will slow us down or cause problems. |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 93 | std::vector<SchemeWithType>* standard_schemes = nullptr; |
| 94 | std::vector<SchemeWithType>* referrer_schemes = nullptr; |
| 95 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 96 | // Similar to above, initialized by the Init*Schemes methods. |
| 97 | std::vector<std::string>* secure_schemes = nullptr; |
| 98 | std::vector<std::string>* local_schemes = nullptr; |
| 99 | std::vector<std::string>* no_access_schemes = nullptr; |
| 100 | std::vector<std::string>* cors_enabled_schemes = nullptr; |
msramek | 7e5c61f | 2017-02-08 11:21:32 | [diff] [blame] | 101 | std::vector<std::string>* web_storage_schemes = nullptr; |
arthursonzogni | eb73e43 | 2017-02-09 11:54:49 | [diff] [blame] | 102 | std::vector<std::string>* csp_bypassing_schemes = nullptr; |
jam | cc2df16 | 2017-03-23 23:54:01 | [diff] [blame] | 103 | std::vector<std::string>* empty_document_schemes = nullptr; |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 104 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 105 | // See the LockSchemeRegistries declaration in the header. |
| 106 | bool scheme_registries_locked = false; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 107 | |
brettw | 8511167 | 2015-07-23 21:56:35 | [diff] [blame] | 108 | // This template converts a given character type to the corresponding |
| 109 | // StringPiece type. |
| 110 | template<typename CHAR> struct CharToStringPiece { |
| 111 | }; |
| 112 | template<> struct CharToStringPiece<char> { |
| 113 | typedef base::StringPiece Piece; |
| 114 | }; |
| 115 | template<> struct CharToStringPiece<base::char16> { |
| 116 | typedef base::StringPiece16 Piece; |
| 117 | }; |
| 118 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 119 | void InitSchemes(std::vector<std::string>** schemes, |
| 120 | const char** initial_schemes, |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 121 | size_t size) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 122 | *schemes = new std::vector<std::string>(size); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 123 | for (size_t i = 0; i < size; i++) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 124 | (*(*schemes))[i] = initial_schemes[i]; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 125 | } |
| 126 | } |
| 127 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 128 | void InitSchemesWithType(std::vector<SchemeWithType>** schemes, |
| 129 | const SchemeWithType* initial_schemes, |
| 130 | size_t size) { |
| 131 | *schemes = new std::vector<SchemeWithType>(size); |
| 132 | for (size_t i = 0; i < size; i++) { |
| 133 | (*(*schemes))[i] = initial_schemes[i]; |
| 134 | } |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 135 | } |
| 136 | |
| 137 | // Given a string and a range inside the string, compares it to the given |
| 138 | // lower-case |compare_to| buffer. |
| 139 | template<typename CHAR> |
| 140 | inline bool DoCompareSchemeComponent(const CHAR* spec, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 141 | const Component& component, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 142 | const char* compare_to) { |
| 143 | if (!component.is_nonempty()) |
| 144 | return compare_to[0] == 0; // When component is empty, match empty scheme. |
brettw | 8511167 | 2015-07-23 21:56:35 | [diff] [blame] | 145 | return base::LowerCaseEqualsASCII( |
| 146 | typename CharToStringPiece<CHAR>::Piece( |
| 147 | &spec[component.begin], component.len), |
| 148 | compare_to); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 149 | } |
| 150 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 151 | // Returns true and sets |type| to the SchemeType of the given scheme |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 152 | // identified by |scheme| within |spec| if in |schemes|. |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 153 | template<typename CHAR> |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 154 | bool DoIsInSchemes(const CHAR* spec, |
| 155 | const Component& scheme, |
| 156 | SchemeType* type, |
| 157 | const std::vector<SchemeWithType>& schemes) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 158 | if (!scheme.is_nonempty()) |
| 159 | return false; // Empty or invalid schemes are non-standard. |
| 160 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 161 | for (const SchemeWithType& scheme_with_type : schemes) { |
| 162 | if (base::LowerCaseEqualsASCII(typename CharToStringPiece<CHAR>::Piece( |
| 163 | &spec[scheme.begin], scheme.len), |
| 164 | scheme_with_type.scheme)) { |
| 165 | *type = scheme_with_type.type; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 166 | return true; |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 167 | } |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 168 | } |
| 169 | return false; |
| 170 | } |
| 171 | |
| 172 | template<typename CHAR> |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 173 | bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 174 | Initialize(); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 175 | return DoIsInSchemes(spec, scheme, type, *standard_schemes); |
| 176 | } |
| 177 | |
| 178 | |
| 179 | template<typename CHAR> |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 180 | bool DoFindAndCompareScheme(const CHAR* str, |
| 181 | int str_len, |
| 182 | const char* compare, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 183 | Component* found_scheme) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 184 | // Before extracting scheme, canonicalize the URL to remove any whitespace. |
| 185 | // This matches the canonicalization done in DoCanonicalize function. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 186 | RawCanonOutputT<CHAR> whitespace_buffer; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 187 | int spec_len; |
Mike West | 9e5ae90 | 2017-05-24 15:17:50 | [diff] [blame] | 188 | const CHAR* spec = |
| 189 | RemoveURLWhitespace(str, str_len, &whitespace_buffer, &spec_len, nullptr); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 190 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 191 | Component our_scheme; |
| 192 | if (!ExtractScheme(spec, spec_len, &our_scheme)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 193 | // No scheme. |
| 194 | if (found_scheme) |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 195 | *found_scheme = Component(); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 196 | return false; |
| 197 | } |
| 198 | if (found_scheme) |
| 199 | *found_scheme = our_scheme; |
| 200 | return DoCompareSchemeComponent(spec, our_scheme, compare); |
| 201 | } |
| 202 | |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 203 | template <typename CHAR> |
| 204 | bool DoCanonicalize(const CHAR* spec, |
| 205 | int spec_len, |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 206 | bool trim_path_end, |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 207 | WhitespaceRemovalPolicy whitespace_policy, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 208 | CharsetConverter* charset_converter, |
| 209 | CanonOutput* output, |
| 210 | Parsed* output_parsed) { |
csharrison | 60e6ff0e | 2017-01-31 23:59:29 | [diff] [blame] | 211 | output->ReserveSizeIfNeeded(spec_len); |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 212 | |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 213 | // Remove any whitespace from the middle of the relative URL if necessary. |
| 214 | // Possibly this will result in copying to the new buffer. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 215 | RawCanonOutputT<CHAR> whitespace_buffer; |
mkwst | 41318f4 | 2017-01-19 15:11:50 | [diff] [blame] | 216 | if (whitespace_policy == REMOVE_WHITESPACE) { |
Mike West | 9e5ae90 | 2017-05-24 15:17:50 | [diff] [blame] | 217 | spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len, |
| 218 | &output_parsed->potentially_dangling_markup); |
mkwst | 41318f4 | 2017-01-19 15:11:50 | [diff] [blame] | 219 | } |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 220 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 221 | Parsed parsed_input; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 222 | #ifdef WIN32 |
| 223 | // For Windows, we allow things that look like absolute Windows paths to be |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 224 | // fixed up magically to file URLs. This is done for IE compatibility. For |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 225 | // example, this will change "c:/foo" into a file URL rather than treating |
| 226 | // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). |
| 227 | // There is similar logic in url_canon_relative.cc for |
| 228 | // |
| 229 | // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which |
| 230 | // has no meaning as an absolute path name. This is because browsers on Mac |
| 231 | // & Unix don't generally do this, so there is no compatibility reason for |
| 232 | // doing so. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 233 | if (DoesBeginUNCPath(spec, 0, spec_len, false) || |
| 234 | DoesBeginWindowsDriveSpec(spec, 0, spec_len)) { |
| 235 | ParseFileURL(spec, spec_len, &parsed_input); |
| 236 | return CanonicalizeFileURL(spec, spec_len, parsed_input, charset_converter, |
| 237 | output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 238 | } |
| 239 | #endif |
| 240 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 241 | Component scheme; |
| 242 | if (!ExtractScheme(spec, spec_len, &scheme)) |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 243 | return false; |
| 244 | |
| 245 | // This is the parsed version of the input URL, we have to canonicalize it |
| 246 | // before storing it in our object. |
| 247 | bool success; |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 248 | SchemeType unused_scheme_type = SCHEME_WITH_PORT; |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 249 | if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 250 | // File URLs are special. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 251 | ParseFileURL(spec, spec_len, &parsed_input); |
| 252 | success = CanonicalizeFileURL(spec, spec_len, parsed_input, |
| 253 | charset_converter, output, output_parsed); |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 254 | } else if (DoCompareSchemeComponent(spec, scheme, url::kFileSystemScheme)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 255 | // Filesystem URLs are special. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 256 | ParseFileSystemURL(spec, spec_len, &parsed_input); |
| 257 | success = CanonicalizeFileSystemURL(spec, spec_len, parsed_input, |
| 258 | charset_converter, output, |
| 259 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 260 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 261 | } else if (DoIsStandard(spec, scheme, &unused_scheme_type)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 262 | // All "normal" URLs. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 263 | ParseStandardURL(spec, spec_len, &parsed_input); |
| 264 | success = CanonicalizeStandardURL(spec, spec_len, parsed_input, |
| 265 | charset_converter, output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 266 | |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 267 | } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) { |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 268 | // Mailto URLs are treated like standard URLs, with only a scheme, path, |
| 269 | // and query. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 270 | ParseMailtoURL(spec, spec_len, &parsed_input); |
| 271 | success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output, |
| 272 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 273 | |
| 274 | } else { |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 275 | // "Weird" URLs like data: and javascript:. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 276 | ParsePathURL(spec, spec_len, trim_path_end, &parsed_input); |
| 277 | success = CanonicalizePathURL(spec, spec_len, parsed_input, output, |
| 278 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 279 | } |
| 280 | return success; |
| 281 | } |
| 282 | |
| 283 | template<typename CHAR> |
| 284 | bool DoResolveRelative(const char* base_spec, |
| 285 | int base_spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 286 | const Parsed& base_parsed, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 287 | const CHAR* in_relative, |
| 288 | int in_relative_length, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 289 | CharsetConverter* charset_converter, |
| 290 | CanonOutput* output, |
| 291 | Parsed* output_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 292 | // Remove any whitespace from the middle of the relative URL, possibly |
| 293 | // copying to the new buffer. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 294 | RawCanonOutputT<CHAR> whitespace_buffer; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 295 | int relative_length; |
Mike West | 9e5ae90 | 2017-05-24 15:17:50 | [diff] [blame] | 296 | const CHAR* relative = RemoveURLWhitespace( |
| 297 | in_relative, in_relative_length, &whitespace_buffer, &relative_length, |
| 298 | &output_parsed->potentially_dangling_markup); |
mkwst | 41318f4 | 2017-01-19 15:11:50 | [diff] [blame] | 299 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 300 | bool base_is_authority_based = false; |
| 301 | bool base_is_hierarchical = false; |
| 302 | if (base_spec && |
| 303 | base_parsed.scheme.is_nonempty()) { |
| 304 | int after_scheme = base_parsed.scheme.end() + 1; // Skip past the colon. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 305 | int num_slashes = CountConsecutiveSlashes(base_spec, after_scheme, |
| 306 | base_spec_len); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 307 | base_is_authority_based = num_slashes > 1; |
| 308 | base_is_hierarchical = num_slashes > 0; |
| 309 | } |
| 310 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 311 | SchemeType unused_scheme_type = SCHEME_WITH_PORT; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 312 | bool standard_base_scheme = |
| 313 | base_parsed.scheme.is_nonempty() && |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 314 | DoIsStandard(base_spec, base_parsed.scheme, &unused_scheme_type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 315 | |
| 316 | bool is_relative; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 317 | Component relative_component; |
| 318 | if (!IsRelativeURL(base_spec, base_parsed, relative, relative_length, |
| 319 | (base_is_hierarchical || standard_base_scheme), |
| 320 | &is_relative, &relative_component)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 321 | // Error resolving. |
| 322 | return false; |
| 323 | } |
| 324 | |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 325 | // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and |
| 326 | // ReserveRelativeURL, to enable more accurate buffer sizes. |
| 327 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 328 | // Pretend for a moment that |base_spec| is a standard URL. Normally |
| 329 | // non-standard URLs are treated as PathURLs, but if the base has an |
| 330 | // authority we would like to preserve it. |
| 331 | if (is_relative && base_is_authority_based && !standard_base_scheme) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 332 | Parsed base_parsed_authority; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 333 | ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority); |
| 334 | if (base_parsed_authority.host.is_nonempty()) { |
zherczeg.u-szeged | 1e2171c | 2014-12-04 11:52:36 | [diff] [blame] | 335 | RawCanonOutputT<char> temporary_output; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 336 | bool did_resolve_succeed = |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 337 | ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, |
zherczeg.u-szeged | 1e2171c | 2014-12-04 11:52:36 | [diff] [blame] | 338 | relative_component, charset_converter, |
| 339 | &temporary_output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 340 | // The output_parsed is incorrect at this point (because it was built |
| 341 | // based on base_parsed_authority instead of base_parsed) and needs to be |
| 342 | // re-created. |
zherczeg.u-szeged | 1e2171c | 2014-12-04 11:52:36 | [diff] [blame] | 343 | DoCanonicalize(temporary_output.data(), temporary_output.length(), true, |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 344 | REMOVE_WHITESPACE, charset_converter, output, |
| 345 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 346 | return did_resolve_succeed; |
| 347 | } |
| 348 | } else if (is_relative) { |
| 349 | // Relative, resolve and canonicalize. |
| 350 | bool file_base_scheme = base_parsed.scheme.is_nonempty() && |
| 351 | DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 352 | return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative, |
| 353 | relative_component, charset_converter, output, |
| 354 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 355 | } |
| 356 | |
| 357 | // Not relative, canonicalize the input. |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 358 | return DoCanonicalize(relative, relative_length, true, |
| 359 | DO_NOT_REMOVE_WHITESPACE, charset_converter, output, |
| 360 | output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 361 | } |
| 362 | |
| 363 | template<typename CHAR> |
| 364 | bool DoReplaceComponents(const char* spec, |
| 365 | int spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 366 | const Parsed& parsed, |
| 367 | const Replacements<CHAR>& replacements, |
| 368 | CharsetConverter* charset_converter, |
| 369 | CanonOutput* output, |
| 370 | Parsed* out_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 371 | // If the scheme is overridden, just do a simple string substitution and |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 372 | // re-parse the whole thing. There are lots of edge cases that we really don't |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 373 | // want to deal with. Like what happens if I replace "http://e:8080/foo" |
| 374 | // with a file. Does it become "file:///E:/8080/foo" where the port number |
| 375 | // becomes part of the path? Parsing that string as a file URL says "yes" |
| 376 | // but almost no sane rule for dealing with the components individually would |
| 377 | // come up with that. |
| 378 | // |
| 379 | // Why allow these crazy cases at all? Programatically, there is almost no |
| 380 | // case for replacing the scheme. The most common case for hitting this is |
| 381 | // in JS when building up a URL using the location object. In this case, the |
| 382 | // JS code expects the string substitution behavior: |
| 383 | // http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3 |
| 384 | if (replacements.IsSchemeOverridden()) { |
| 385 | // Canonicalize the new scheme so it is 8-bit and can be concatenated with |
| 386 | // the existing spec. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 387 | RawCanonOutput<128> scheme_replaced; |
| 388 | Component scheme_replaced_parsed; |
| 389 | CanonicalizeScheme(replacements.sources().scheme, |
| 390 | replacements.components().scheme, |
| 391 | &scheme_replaced, &scheme_replaced_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 392 | |
| 393 | // We can assume that the input is canonicalized, which means it always has |
| 394 | // a colon after the scheme (or where the scheme would be). |
| 395 | int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1 |
| 396 | : 1; |
| 397 | if (spec_len - spec_after_colon > 0) { |
| 398 | scheme_replaced.Append(&spec[spec_after_colon], |
| 399 | spec_len - spec_after_colon); |
| 400 | } |
| 401 | |
| 402 | // We now need to completely re-parse the resulting string since its meaning |
| 403 | // may have changed with the different scheme. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 404 | RawCanonOutput<128> recanonicalized; |
| 405 | Parsed recanonicalized_parsed; |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 406 | DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 407 | REMOVE_WHITESPACE, charset_converter, &recanonicalized, |
| 408 | &recanonicalized_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 409 | |
| 410 | // Recurse using the version with the scheme already replaced. This will now |
| 411 | // use the replacement rules for the new scheme. |
| 412 | // |
| 413 | // Warning: this code assumes that ReplaceComponents will re-check all |
| 414 | // components for validity. This is because we can't fail if DoCanonicalize |
| 415 | // failed above since theoretically the thing making it fail could be |
| 416 | // getting replaced here. If ReplaceComponents didn't re-check everything, |
| 417 | // we wouldn't know if something *not* getting replaced is a problem. |
| 418 | // If the scheme-specific replacers are made more intelligent so they don't |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 419 | // re-check everything, we should instead re-canonicalize the whole thing |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 420 | // after this call to check validity (this assumes replacing the scheme is |
| 421 | // much much less common than other types of replacements, like clearing the |
| 422 | // ref). |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 423 | Replacements<CHAR> replacements_no_scheme = replacements; |
| 424 | replacements_no_scheme.SetScheme(NULL, Component()); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 425 | return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(), |
| 426 | recanonicalized_parsed, replacements_no_scheme, |
| 427 | charset_converter, output, out_parsed); |
| 428 | } |
| 429 | |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 430 | // TODO(csharrison): We could be smarter about size to reserve if this is done |
| 431 | // in callers below, and the code checks to see which components are being |
| 432 | // replaced, and with what length. If this ends up being a hot spot it should |
| 433 | // be changed. |
csharrison | 60e6ff0e | 2017-01-31 23:59:29 | [diff] [blame] | 434 | output->ReserveSizeIfNeeded(spec_len); |
csharrison | 96b890e5 | 2017-01-19 00:13:34 | [diff] [blame] | 435 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 436 | // If we get here, then we know the scheme doesn't need to be replaced, so can |
| 437 | // just key off the scheme in the spec to know how to do the replacements. |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 438 | if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 439 | return ReplaceFileURL(spec, parsed, replacements, charset_converter, output, |
| 440 | out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 441 | } |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 442 | if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileSystemScheme)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 443 | return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter, |
| 444 | output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 445 | } |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 446 | SchemeType unused_scheme_type = SCHEME_WITH_PORT; |
| 447 | if (DoIsStandard(spec, parsed.scheme, &unused_scheme_type)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 448 | return ReplaceStandardURL(spec, parsed, replacements, charset_converter, |
| 449 | output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 450 | } |
[email protected] | cca6f39 | 2014-05-28 21:32:26 | [diff] [blame] | 451 | if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) { |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 452 | return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 453 | } |
| 454 | |
| 455 | // Default is a path URL. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 456 | return ReplacePathURL(spec, parsed, replacements, output, out_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 457 | } |
| 458 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 459 | void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) { |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 460 | DCHECK(schemes); |
| 461 | // If this assert triggers, it means you've called Add*Scheme after |
| 462 | // LockSchemeRegistries has been called (see the header file for |
| 463 | // LockSchemeRegistries for more). |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 464 | // |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 465 | // This normally means you're trying to set up a new scheme too late in your |
| 466 | // application's init process. Locate where your app does this initialization |
| 467 | // and calls LockSchemeRegistries, and add your new scheme there. |
| 468 | DCHECK(!scheme_registries_locked) |
| 469 | << "Trying to add a scheme after the lists have been locked."; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 470 | |
| 471 | size_t scheme_len = strlen(new_scheme); |
| 472 | if (scheme_len == 0) |
| 473 | return; |
| 474 | |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 475 | DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme); |
| 476 | schemes->push_back(std::string(new_scheme)); |
| 477 | } |
| 478 | |
| 479 | void DoAddSchemeWithType(const char* new_scheme, |
| 480 | SchemeType type, |
| 481 | std::vector<SchemeWithType>* schemes) { |
| 482 | DCHECK(schemes); |
| 483 | // If this assert triggers, it means you've called Add*Scheme after |
| 484 | // LockSchemeRegistries has been called (see the header file for |
| 485 | // LockSchemeRegistries for more). |
| 486 | // |
| 487 | // This normally means you're trying to set up a new scheme too late in your |
| 488 | // application's init process. Locate where your app does this initialization |
| 489 | // and calls LockSchemeRegistries, and add your new scheme there. |
| 490 | DCHECK(!scheme_registries_locked) |
| 491 | << "Trying to add a scheme after the lists have been locked."; |
| 492 | |
| 493 | size_t scheme_len = strlen(new_scheme); |
| 494 | if (scheme_len == 0) |
| 495 | return; |
| 496 | |
| 497 | DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme); |
qyearsley | 2bc727d | 2015-08-14 20:17:15 | [diff] [blame] | 498 | // Duplicate the scheme into a new buffer and add it to the list of standard |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 499 | // schemes. This pointer will be leaked on shutdown. |
| 500 | char* dup_scheme = new char[scheme_len + 1]; |
[email protected] | 8d892fa8 | 2014-07-02 12:42:04 | [diff] [blame] | 501 | ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 502 | memcpy(dup_scheme, new_scheme, scheme_len + 1); |
| 503 | |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 504 | SchemeWithType scheme_with_type; |
| 505 | scheme_with_type.scheme = dup_scheme; |
| 506 | scheme_with_type.type = type; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 507 | schemes->push_back(scheme_with_type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 508 | } |
| 509 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 510 | } // namespace |
| 511 | |
| 512 | void Initialize() { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 513 | if (initialized) |
| 514 | return; |
| 515 | InitSchemesWithType(&standard_schemes, kStandardURLSchemes, |
| 516 | arraysize(kStandardURLSchemes)); |
| 517 | InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes, |
| 518 | arraysize(kReferrerURLSchemes)); |
| 519 | InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes)); |
| 520 | InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes)); |
| 521 | InitSchemes(&no_access_schemes, kNoAccessSchemes, |
| 522 | arraysize(kNoAccessSchemes)); |
| 523 | InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes, |
| 524 | arraysize(kCORSEnabledSchemes)); |
msramek | 7e5c61f | 2017-02-08 11:21:32 | [diff] [blame] | 525 | InitSchemes(&web_storage_schemes, kWebStorageSchemes, |
| 526 | arraysize(kWebStorageSchemes)); |
arthursonzogni | eb73e43 | 2017-02-09 11:54:49 | [diff] [blame] | 527 | InitSchemes(&csp_bypassing_schemes, nullptr, 0); |
jam | cc2df16 | 2017-03-23 23:54:01 | [diff] [blame] | 528 | InitSchemes(&empty_document_schemes, kEmptyDocumentSchemes, |
| 529 | arraysize(kEmptyDocumentSchemes)); |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 530 | initialized = true; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 531 | } |
| 532 | |
| 533 | void Shutdown() { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 534 | initialized = false; |
| 535 | delete standard_schemes; |
| 536 | standard_schemes = nullptr; |
| 537 | delete referrer_schemes; |
| 538 | referrer_schemes = nullptr; |
| 539 | delete secure_schemes; |
| 540 | secure_schemes = nullptr; |
| 541 | delete local_schemes; |
| 542 | local_schemes = nullptr; |
| 543 | delete no_access_schemes; |
| 544 | no_access_schemes = nullptr; |
| 545 | delete cors_enabled_schemes; |
| 546 | cors_enabled_schemes = nullptr; |
msramek | 7e5c61f | 2017-02-08 11:21:32 | [diff] [blame] | 547 | delete web_storage_schemes; |
| 548 | web_storage_schemes = nullptr; |
arthursonzogni | eb73e43 | 2017-02-09 11:54:49 | [diff] [blame] | 549 | delete csp_bypassing_schemes; |
| 550 | csp_bypassing_schemes = nullptr; |
jam | cc2df16 | 2017-03-23 23:54:01 | [diff] [blame] | 551 | delete empty_document_schemes; |
| 552 | empty_document_schemes = nullptr; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 553 | } |
| 554 | |
| 555 | void AddStandardScheme(const char* new_scheme, SchemeType type) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 556 | Initialize(); |
| 557 | DoAddSchemeWithType(new_scheme, type, standard_schemes); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 558 | } |
| 559 | |
| 560 | void AddReferrerScheme(const char* new_scheme, SchemeType type) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 561 | Initialize(); |
| 562 | DoAddSchemeWithType(new_scheme, type, referrer_schemes); |
| 563 | } |
| 564 | |
| 565 | void AddSecureScheme(const char* new_scheme) { |
| 566 | Initialize(); |
| 567 | DoAddScheme(new_scheme, secure_schemes); |
| 568 | } |
| 569 | |
| 570 | const std::vector<std::string>& GetSecureSchemes() { |
| 571 | Initialize(); |
| 572 | return *secure_schemes; |
| 573 | } |
| 574 | |
| 575 | void AddLocalScheme(const char* new_scheme) { |
| 576 | Initialize(); |
| 577 | DoAddScheme(new_scheme, local_schemes); |
| 578 | } |
| 579 | |
| 580 | const std::vector<std::string>& GetLocalSchemes() { |
| 581 | Initialize(); |
| 582 | return *local_schemes; |
| 583 | } |
| 584 | |
| 585 | void AddNoAccessScheme(const char* new_scheme) { |
| 586 | Initialize(); |
| 587 | DoAddScheme(new_scheme, no_access_schemes); |
| 588 | } |
| 589 | |
| 590 | const std::vector<std::string>& GetNoAccessSchemes() { |
| 591 | Initialize(); |
| 592 | return *no_access_schemes; |
| 593 | } |
| 594 | |
| 595 | void AddCORSEnabledScheme(const char* new_scheme) { |
| 596 | Initialize(); |
| 597 | DoAddScheme(new_scheme, cors_enabled_schemes); |
| 598 | } |
| 599 | |
| 600 | const std::vector<std::string>& GetCORSEnabledSchemes() { |
| 601 | Initialize(); |
| 602 | return *cors_enabled_schemes; |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 603 | } |
| 604 | |
msramek | 7e5c61f | 2017-02-08 11:21:32 | [diff] [blame] | 605 | void AddWebStorageScheme(const char* new_scheme) { |
| 606 | Initialize(); |
| 607 | DoAddScheme(new_scheme, web_storage_schemes); |
| 608 | } |
| 609 | |
| 610 | const std::vector<std::string>& GetWebStorageSchemes() { |
| 611 | Initialize(); |
| 612 | return *web_storage_schemes; |
| 613 | } |
| 614 | |
arthursonzogni | eb73e43 | 2017-02-09 11:54:49 | [diff] [blame] | 615 | void AddCSPBypassingScheme(const char* new_scheme) { |
| 616 | Initialize(); |
| 617 | DoAddScheme(new_scheme, csp_bypassing_schemes); |
| 618 | } |
| 619 | |
| 620 | const std::vector<std::string>& GetCSPBypassingSchemes() { |
| 621 | Initialize(); |
| 622 | return *csp_bypassing_schemes; |
| 623 | } |
| 624 | |
jam | cc2df16 | 2017-03-23 23:54:01 | [diff] [blame] | 625 | void AddEmptyDocumentScheme(const char* new_scheme) { |
| 626 | Initialize(); |
| 627 | DoAddScheme(new_scheme, empty_document_schemes); |
| 628 | } |
| 629 | |
| 630 | const std::vector<std::string>& GetEmptyDocumentSchemes() { |
| 631 | Initialize(); |
| 632 | return *empty_document_schemes; |
| 633 | } |
| 634 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 635 | void LockSchemeRegistries() { |
| 636 | scheme_registries_locked = true; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 637 | } |
| 638 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 639 | bool IsStandard(const char* spec, const Component& scheme) { |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 640 | SchemeType unused_scheme_type; |
| 641 | return DoIsStandard(spec, scheme, &unused_scheme_type); |
| 642 | } |
| 643 | |
| 644 | bool GetStandardSchemeType(const char* spec, |
| 645 | const Component& scheme, |
| 646 | SchemeType* type) { |
| 647 | return DoIsStandard(spec, scheme, type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 648 | } |
| 649 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 650 | bool IsStandard(const base::char16* spec, const Component& scheme) { |
tyoshino | 11a7c9fe | 2015-08-19 08:51:46 | [diff] [blame] | 651 | SchemeType unused_scheme_type; |
| 652 | return DoIsStandard(spec, scheme, &unused_scheme_type); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 653 | } |
| 654 | |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 655 | bool IsReferrerScheme(const char* spec, const Component& scheme) { |
jam | 0901535 | 2017-01-19 01:49:02 | [diff] [blame] | 656 | Initialize(); |
lizeb | 5120f6dc | 2016-02-19 09:29:44 | [diff] [blame] | 657 | SchemeType unused_scheme_type; |
| 658 | return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes); |
| 659 | } |
| 660 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 661 | bool FindAndCompareScheme(const char* str, |
| 662 | int str_len, |
| 663 | const char* compare, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 664 | Component* found_scheme) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 665 | return DoFindAndCompareScheme(str, str_len, compare, found_scheme); |
| 666 | } |
| 667 | |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 668 | bool FindAndCompareScheme(const base::char16* str, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 669 | int str_len, |
| 670 | const char* compare, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 671 | Component* found_scheme) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 672 | return DoFindAndCompareScheme(str, str_len, compare, found_scheme); |
| 673 | } |
| 674 | |
pkalinnikov | 054f403 | 2016-08-31 10:54:17 | [diff] [blame] | 675 | bool DomainIs(base::StringPiece canonicalized_host, |
| 676 | base::StringPiece lower_ascii_domain) { |
| 677 | if (canonicalized_host.empty() || lower_ascii_domain.empty()) |
| 678 | return false; |
| 679 | |
| 680 | // If the host name ends with a dot but the input domain doesn't, then we |
| 681 | // ignore the dot in the host name. |
| 682 | size_t host_len = canonicalized_host.length(); |
| 683 | if (canonicalized_host.back() == '.' && lower_ascii_domain.back() != '.') |
| 684 | --host_len; |
| 685 | |
| 686 | if (host_len < lower_ascii_domain.length()) |
| 687 | return false; |
| 688 | |
| 689 | // |host_first_pos| is the start of the compared part of the host name, not |
| 690 | // start of the whole host name. |
| 691 | const char* host_first_pos = |
| 692 | canonicalized_host.data() + host_len - lower_ascii_domain.length(); |
| 693 | |
| 694 | if (!base::LowerCaseEqualsASCII( |
| 695 | base::StringPiece(host_first_pos, lower_ascii_domain.length()), |
| 696 | lower_ascii_domain)) { |
| 697 | return false; |
| 698 | } |
| 699 | |
| 700 | // Make sure there aren't extra characters in host before the compared part; |
| 701 | // if the host name is longer than the input domain name, then the character |
| 702 | // immediately before the compared part should be a dot. For example, |
| 703 | // www.google.com has domain "google.com", but www.iamnotgoogle.com does not. |
| 704 | if (lower_ascii_domain[0] != '.' && host_len > lower_ascii_domain.length() && |
| 705 | *(host_first_pos - 1) != '.') { |
| 706 | return false; |
| 707 | } |
| 708 | |
| 709 | return true; |
| 710 | } |
| 711 | |
csharrison | 475851da | 2016-12-17 02:19:42 | [diff] [blame] | 712 | bool HostIsIPAddress(base::StringPiece host) { |
| 713 | url::RawCanonOutputT<char, 128> ignored_output; |
| 714 | url::CanonHostInfo host_info; |
| 715 | url::CanonicalizeIPAddress(host.data(), Component(0, host.length()), |
| 716 | &ignored_output, &host_info); |
| 717 | return host_info.IsIPAddress(); |
| 718 | } |
| 719 | |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 720 | bool Canonicalize(const char* spec, |
| 721 | int spec_len, |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 722 | bool trim_path_end, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 723 | CharsetConverter* charset_converter, |
| 724 | CanonOutput* output, |
| 725 | Parsed* output_parsed) { |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 726 | return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 727 | charset_converter, output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 728 | } |
| 729 | |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 730 | bool Canonicalize(const base::char16* spec, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 731 | int spec_len, |
[email protected] | 369e84f7 | 2013-11-23 01:53:52 | [diff] [blame] | 732 | bool trim_path_end, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 733 | CharsetConverter* charset_converter, |
| 734 | CanonOutput* output, |
| 735 | Parsed* output_parsed) { |
csharrison | c6453720 | 2016-12-01 14:15:14 | [diff] [blame] | 736 | return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 737 | charset_converter, output, output_parsed); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 738 | } |
| 739 | |
| 740 | bool ResolveRelative(const char* base_spec, |
| 741 | int base_spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 742 | const Parsed& base_parsed, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 743 | const char* relative, |
| 744 | int relative_length, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 745 | CharsetConverter* charset_converter, |
| 746 | CanonOutput* output, |
| 747 | Parsed* output_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 748 | return DoResolveRelative(base_spec, base_spec_len, base_parsed, |
| 749 | relative, relative_length, |
| 750 | charset_converter, output, output_parsed); |
| 751 | } |
| 752 | |
| 753 | bool ResolveRelative(const char* base_spec, |
| 754 | int base_spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 755 | const Parsed& base_parsed, |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 756 | const base::char16* relative, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 757 | int relative_length, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 758 | CharsetConverter* charset_converter, |
| 759 | CanonOutput* output, |
| 760 | Parsed* output_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 761 | return DoResolveRelative(base_spec, base_spec_len, base_parsed, |
| 762 | relative, relative_length, |
| 763 | charset_converter, output, output_parsed); |
| 764 | } |
| 765 | |
| 766 | bool ReplaceComponents(const char* spec, |
| 767 | int spec_len, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 768 | const Parsed& parsed, |
| 769 | const Replacements<char>& replacements, |
| 770 | CharsetConverter* charset_converter, |
| 771 | CanonOutput* output, |
| 772 | Parsed* out_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 773 | return DoReplaceComponents(spec, spec_len, parsed, replacements, |
| 774 | charset_converter, output, out_parsed); |
| 775 | } |
| 776 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 777 | bool ReplaceComponents(const char* spec, |
| 778 | int spec_len, |
| 779 | const Parsed& parsed, |
| 780 | const Replacements<base::char16>& replacements, |
| 781 | CharsetConverter* charset_converter, |
| 782 | CanonOutput* output, |
| 783 | Parsed* out_parsed) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 784 | return DoReplaceComponents(spec, spec_len, parsed, replacements, |
| 785 | charset_converter, output, out_parsed); |
| 786 | } |
| 787 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 788 | void DecodeURLEscapeSequences(const char* input, |
| 789 | int length, |
| 790 | CanonOutputW* output) { |
| 791 | RawCanonOutputT<char> unescaped_chars; |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 792 | for (int i = 0; i < length; i++) { |
| 793 | if (input[i] == '%') { |
| 794 | unsigned char ch; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 795 | if (DecodeEscaped(input, &i, length, &ch)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 796 | unescaped_chars.push_back(ch); |
| 797 | } else { |
| 798 | // Invalid escape sequence, copy the percent literal. |
| 799 | unescaped_chars.push_back('%'); |
| 800 | } |
| 801 | } else { |
| 802 | // Regular non-escaped 8-bit character. |
| 803 | unescaped_chars.push_back(input[i]); |
| 804 | } |
| 805 | } |
| 806 | |
| 807 | // Convert that 8-bit to UTF-16. It's not clear IE does this at all to |
| 808 | // JavaScript URLs, but Firefox and Safari do. |
| 809 | for (int i = 0; i < unescaped_chars.length(); i++) { |
| 810 | unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i)); |
| 811 | if (uch < 0x80) { |
| 812 | // Non-UTF-8, just append directly |
| 813 | output->push_back(uch); |
| 814 | } else { |
| 815 | // next_ch will point to the last character of the decoded |
| 816 | // character. |
| 817 | int next_character = i; |
| 818 | unsigned code_point; |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 819 | if (ReadUTFChar(unescaped_chars.data(), &next_character, |
| 820 | unescaped_chars.length(), &code_point)) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 821 | // Valid UTF-8 character, convert to UTF-16. |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 822 | AppendUTF16Value(code_point, output); |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 823 | i = next_character; |
| 824 | } else { |
| 825 | // If there are any sequences that are not valid UTF-8, we keep |
| 826 | // invalid code points and promote to UTF-16. We copy all characters |
| 827 | // from the current position to the end of the identified sequence. |
| 828 | while (i < next_character) { |
| 829 | output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); |
| 830 | i++; |
| 831 | } |
| 832 | output->push_back(static_cast<unsigned char>(unescaped_chars.at(i))); |
| 833 | } |
| 834 | } |
| 835 | } |
| 836 | } |
| 837 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 838 | void EncodeURIComponent(const char* input, int length, CanonOutput* output) { |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 839 | for (int i = 0; i < length; ++i) { |
| 840 | unsigned char c = static_cast<unsigned char>(input[i]); |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 841 | if (IsComponentChar(c)) |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 842 | output->push_back(c); |
| 843 | else |
| 844 | AppendEscapedChar(c, output); |
| 845 | } |
| 846 | } |
| 847 | |
| 848 | bool CompareSchemeComponent(const char* spec, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 849 | const Component& component, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 850 | const char* compare_to) { |
| 851 | return DoCompareSchemeComponent(spec, component, compare_to); |
| 852 | } |
| 853 | |
[email protected] | 3774f83 | 2013-06-11 21:21:57 | [diff] [blame] | 854 | bool CompareSchemeComponent(const base::char16* spec, |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 855 | const Component& component, |
[email protected] | e7bba5f8 | 2013-04-10 20:10:52 | [diff] [blame] | 856 | const char* compare_to) { |
| 857 | return DoCompareSchemeComponent(spec, component, compare_to); |
| 858 | } |
| 859 | |
[email protected] | 0318f92 | 2014-04-22 00:09:23 | [diff] [blame] | 860 | } // namespace url |