Avi Drissman | 6459548 | 2022-09-14 20:52:29 | [diff] [blame] | 1 | // Copyright 2021 The Chromium Authors |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "net/base/proxy_string_util.h" |
| 6 | |
| 7 | #include <string> |
| 8 | |
| 9 | #include "base/notreached.h" |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 10 | #include "base/strings/strcat.h" |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 11 | #include "base/strings/string_piece.h" |
| 12 | #include "base/strings/string_util.h" |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 13 | #include "net/base/proxy_server.h" |
| 14 | #include "net/base/url_util.h" |
| 15 | #include "net/http/http_util.h" |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 16 | #include "url/third_party/mozilla/url_parse.h" |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 17 | |
| 18 | namespace net { |
| 19 | |
| 20 | namespace { |
| 21 | |
| 22 | // Parses the proxy type from a PAC string, to a ProxyServer::Scheme. |
| 23 | // This mapping is case-insensitive. If no type could be matched |
| 24 | // returns SCHEME_INVALID. |
| 25 | ProxyServer::Scheme GetSchemeFromPacTypeInternal(base::StringPiece type) { |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 26 | if (base::EqualsCaseInsensitiveASCII(type, "proxy")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 27 | return ProxyServer::SCHEME_HTTP; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 28 | if (base::EqualsCaseInsensitiveASCII(type, "socks")) { |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 29 | // Default to v4 for compatibility. This is because the SOCKS4 vs SOCKS5 |
| 30 | // notation didn't originally exist, so if a client returns SOCKS they |
| 31 | // really meant SOCKS4. |
| 32 | return ProxyServer::SCHEME_SOCKS4; |
| 33 | } |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 34 | if (base::EqualsCaseInsensitiveASCII(type, "socks4")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 35 | return ProxyServer::SCHEME_SOCKS4; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 36 | if (base::EqualsCaseInsensitiveASCII(type, "socks5")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 37 | return ProxyServer::SCHEME_SOCKS5; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 38 | if (base::EqualsCaseInsensitiveASCII(type, "direct")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 39 | return ProxyServer::SCHEME_DIRECT; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 40 | if (base::EqualsCaseInsensitiveASCII(type, "https")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 41 | return ProxyServer::SCHEME_HTTPS; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 42 | if (base::EqualsCaseInsensitiveASCII(type, "quic")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 43 | return ProxyServer::SCHEME_QUIC; |
| 44 | |
| 45 | return ProxyServer::SCHEME_INVALID; |
| 46 | } |
| 47 | |
| 48 | ProxyServer FromSchemeHostAndPort(ProxyServer::Scheme scheme, |
| 49 | base::StringPiece host_and_port) { |
| 50 | // Trim leading/trailing space. |
| 51 | host_and_port = HttpUtil::TrimLWS(host_and_port); |
| 52 | |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 53 | if (scheme == ProxyServer::SCHEME_INVALID) |
| 54 | return ProxyServer(); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 55 | |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 56 | if (scheme == ProxyServer::SCHEME_DIRECT) { |
| 57 | if (!host_and_port.empty()) |
| 58 | return ProxyServer(); // Invalid -- DIRECT cannot have a host/port. |
| 59 | return ProxyServer::Direct(); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 60 | } |
| 61 | |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 62 | url::Component username_component; |
| 63 | url::Component password_component; |
| 64 | url::Component hostname_component; |
| 65 | url::Component port_component; |
| 66 | url::ParseAuthority(host_and_port.data(), |
| 67 | url::Component(0, host_and_port.size()), |
| 68 | &username_component, &password_component, |
| 69 | &hostname_component, &port_component); |
| 70 | if (username_component.is_valid() || password_component.is_valid() || |
Tom Sepez | 40fbf43e | 2022-11-15 00:11:03 | [diff] [blame] | 71 | hostname_component.is_empty()) { |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 72 | return ProxyServer(); |
| 73 | } |
| 74 | |
| 75 | base::StringPiece hostname = |
| 76 | host_and_port.substr(hostname_component.begin, hostname_component.len); |
| 77 | |
| 78 | // Reject inputs like "foo:". /url parsing and canonicalization code generally |
| 79 | // allows it and treats it the same as a URL without a specified port, but |
| 80 | // Chrome has traditionally disallowed it in proxy specifications. |
Tom Sepez | 40fbf43e | 2022-11-15 00:11:03 | [diff] [blame] | 81 | if (port_component.is_valid() && port_component.is_empty()) |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 82 | return ProxyServer(); |
| 83 | base::StringPiece port = |
| 84 | port_component.is_nonempty() |
| 85 | ? host_and_port.substr(port_component.begin, port_component.len) |
| 86 | : ""; |
| 87 | |
| 88 | return ProxyServer::FromSchemeHostAndPort(scheme, hostname, port); |
| 89 | } |
| 90 | |
| 91 | std::string ConstructHostPortString(base::StringPiece hostname, uint16_t port) { |
| 92 | DCHECK(!hostname.empty()); |
| 93 | DCHECK((hostname.front() == '[' && hostname.back() == ']') || |
| 94 | hostname.find(":") == base::StringPiece::npos); |
| 95 | |
| 96 | return base::StrCat({hostname, ":", base::NumberToString(port)}); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 97 | } |
| 98 | |
| 99 | } // namespace |
| 100 | |
| 101 | ProxyServer PacResultElementToProxyServer( |
| 102 | base::StringPiece pac_result_element) { |
| 103 | // Trim the leading/trailing whitespace. |
| 104 | pac_result_element = HttpUtil::TrimLWS(pac_result_element); |
| 105 | |
| 106 | // Input should match: |
| 107 | // "DIRECT" | ( <type> 1*(LWS) <host-and-port> ) |
| 108 | |
| 109 | // Start by finding the first space (if any). |
| 110 | size_t space = 0; |
| 111 | for (; space < pac_result_element.size(); space++) { |
| 112 | if (HttpUtil::IsLWS(pac_result_element[space])) { |
| 113 | break; |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | // Everything to the left of the space is the scheme. |
| 118 | ProxyServer::Scheme scheme = |
| 119 | GetSchemeFromPacTypeInternal(pac_result_element.substr(0, space)); |
| 120 | |
| 121 | // And everything to the right of the space is the |
| 122 | // <host>[":" <port>]. |
| 123 | return FromSchemeHostAndPort(scheme, pac_result_element.substr(space)); |
| 124 | } |
| 125 | |
| 126 | std::string ProxyServerToPacResultElement(const ProxyServer& proxy_server) { |
| 127 | switch (proxy_server.scheme()) { |
| 128 | case ProxyServer::SCHEME_DIRECT: |
| 129 | return "DIRECT"; |
| 130 | case ProxyServer::SCHEME_HTTP: |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 131 | return std::string("PROXY ") + |
| 132 | ConstructHostPortString(proxy_server.GetHost(), |
| 133 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 134 | case ProxyServer::SCHEME_SOCKS4: |
| 135 | // For compatibility send SOCKS instead of SOCKS4. |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 136 | return std::string("SOCKS ") + |
| 137 | ConstructHostPortString(proxy_server.GetHost(), |
| 138 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 139 | case ProxyServer::SCHEME_SOCKS5: |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 140 | return std::string("SOCKS5 ") + |
| 141 | ConstructHostPortString(proxy_server.GetHost(), |
| 142 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 143 | case ProxyServer::SCHEME_HTTPS: |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 144 | return std::string("HTTPS ") + |
| 145 | ConstructHostPortString(proxy_server.GetHost(), |
| 146 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 147 | case ProxyServer::SCHEME_QUIC: |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 148 | return std::string("QUIC ") + |
| 149 | ConstructHostPortString(proxy_server.GetHost(), |
| 150 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 151 | default: |
| 152 | // Got called with an invalid scheme. |
| 153 | NOTREACHED(); |
| 154 | return std::string(); |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | ProxyServer ProxyUriToProxyServer(base::StringPiece uri, |
| 159 | ProxyServer::Scheme default_scheme) { |
| 160 | // We will default to |default_scheme| if no scheme specifier was given. |
| 161 | ProxyServer::Scheme scheme = default_scheme; |
| 162 | |
| 163 | // Trim the leading/trailing whitespace. |
| 164 | uri = HttpUtil::TrimLWS(uri); |
| 165 | |
| 166 | // Check for [<scheme> "://"] |
| 167 | size_t colon = uri.find(':'); |
| 168 | if (colon != base::StringPiece::npos && uri.size() - colon >= 3 && |
| 169 | uri[colon + 1] == '/' && uri[colon + 2] == '/') { |
| 170 | scheme = GetSchemeFromUriScheme(uri.substr(0, colon)); |
| 171 | uri = uri.substr(colon + 3); // Skip past the "://" |
| 172 | } |
| 173 | |
| 174 | // Now parse the <host>[":"<port>]. |
| 175 | return FromSchemeHostAndPort(scheme, uri); |
| 176 | } |
| 177 | |
| 178 | std::string ProxyServerToProxyUri(const ProxyServer& proxy_server) { |
| 179 | switch (proxy_server.scheme()) { |
| 180 | case ProxyServer::SCHEME_DIRECT: |
| 181 | return "direct://"; |
| 182 | case ProxyServer::SCHEME_HTTP: |
| 183 | // Leave off "http://" since it is our default scheme. |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 184 | return ConstructHostPortString(proxy_server.GetHost(), |
| 185 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 186 | case ProxyServer::SCHEME_SOCKS4: |
| 187 | return std::string("socks4://") + |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 188 | ConstructHostPortString(proxy_server.GetHost(), |
| 189 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 190 | case ProxyServer::SCHEME_SOCKS5: |
| 191 | return std::string("socks5://") + |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 192 | ConstructHostPortString(proxy_server.GetHost(), |
| 193 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 194 | case ProxyServer::SCHEME_HTTPS: |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 195 | return std::string("https://") + |
| 196 | ConstructHostPortString(proxy_server.GetHost(), |
| 197 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 198 | case ProxyServer::SCHEME_QUIC: |
Eric Orth | 5dbd459 | 2021-09-24 21:21:24 | [diff] [blame] | 199 | return std::string("quic://") + |
| 200 | ConstructHostPortString(proxy_server.GetHost(), |
| 201 | proxy_server.GetPort()); |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 202 | default: |
| 203 | // Got called with an invalid scheme. |
| 204 | NOTREACHED(); |
| 205 | return std::string(); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | ProxyServer::Scheme GetSchemeFromUriScheme(base::StringPiece scheme) { |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 210 | if (base::EqualsCaseInsensitiveASCII(scheme, "http")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 211 | return ProxyServer::SCHEME_HTTP; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 212 | if (base::EqualsCaseInsensitiveASCII(scheme, "socks4")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 213 | return ProxyServer::SCHEME_SOCKS4; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 214 | if (base::EqualsCaseInsensitiveASCII(scheme, "socks")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 215 | return ProxyServer::SCHEME_SOCKS5; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 216 | if (base::EqualsCaseInsensitiveASCII(scheme, "socks5")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 217 | return ProxyServer::SCHEME_SOCKS5; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 218 | if (base::EqualsCaseInsensitiveASCII(scheme, "direct")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 219 | return ProxyServer::SCHEME_DIRECT; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 220 | if (base::EqualsCaseInsensitiveASCII(scheme, "https")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 221 | return ProxyServer::SCHEME_HTTPS; |
Dan McArdle | 841e11d | 2022-05-27 22:00:56 | [diff] [blame] | 222 | if (base::EqualsCaseInsensitiveASCII(scheme, "quic")) |
Eric Orth | 5ccc3f0 | 2021-09-23 00:01:57 | [diff] [blame] | 223 | return ProxyServer::SCHEME_QUIC; |
| 224 | return ProxyServer::SCHEME_INVALID; |
| 225 | } |
| 226 | |
| 227 | } // namespace net |