blob: 52c8bf82b8db14a33fca0b16c737cd1fb9a78909 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/base/net_util.h"
#include <errno.h>
#include <algorithm>
#include <limits>
#include <string>
#include "build/build_config.h"
#if defined(OS_WIN)
#include <windows.h>
#include <iphlpapi.h>
#include <winsock2.h>
#pragma comment(lib, "iphlpapi.lib")
#elif defined(OS_POSIX)
#include <fcntl.h>
#include <netdb.h>
#include <unistd.h>
#endif // defined(OS_POSIX)
#include "base/json/string_escape.h"
#include "base/logging.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/sys_byteorder.h"
#include "base/values.h"
#include "net/base/address_list.h"
#include "net/base/escape.h"
#include "net/base/ip_address_number.h"
#include "net/base/net_module.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "net/base/url_util.h"
#include "net/grit/net_resources.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_ip.h"
#if defined(OS_WIN)
#include "net/base/winsock_init.h"
#endif
namespace net {
namespace {
std::string NormalizeHostname(base::StringPiece host) {
std::string result = base::ToLowerASCII(host);
if (!result.empty() && *result.rbegin() == '.')
result.resize(result.size() - 1);
return result;
}
bool IsNormalizedLocalhostTLD(const std::string& host) {
return base::EndsWith(host, ".localhost", base::CompareCase::SENSITIVE);
}
// |host| should be normalized.
bool IsLocalHostname(const std::string& host) {
return host == "localhost" || host == "localhost.localdomain" ||
IsNormalizedLocalhostTLD(host);
}
// |host| should be normalized.
bool IsLocal6Hostname(const std::string& host) {
return host == "localhost6" || host == "localhost6.localdomain6";
}
} // namespace
std::string CanonicalizeHost(const std::string& host,
url::CanonHostInfo* host_info) {
// Try to canonicalize the host.
const url::Component raw_host_component(0, static_cast<int>(host.length()));
std::string canon_host;
url::StdStringCanonOutput canon_host_output(&canon_host);
url::CanonicalizeHostVerbose(host.c_str(), raw_host_component,
&canon_host_output, host_info);
if (host_info->out_host.is_nonempty() &&
host_info->family != url::CanonHostInfo::BROKEN) {
// Success! Assert that there's no extra garbage.
canon_host_output.Complete();
DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));
} else {
// Empty host, or canonicalization failed. We'll return empty.
canon_host.clear();
}
return canon_host;
}
std::string GetDirectoryListingHeader(const base::string16& title) {
static const base::StringPiece header(
NetModule::GetResource(IDR_DIR_HEADER_HTML));
// This can be null in unit tests.
DLOG_IF(WARNING, header.empty()) <<
"Missing resource: directory listing header";
std::string result;
if (!header.empty())
result.assign(header.data(), header.size());
result.append("<script>start(");
base::EscapeJSONString(title, true, &result);
result.append(");</script>\n");
return result;
}
inline bool IsHostCharAlphanumeric(char c) {
// We can just check lowercase because uppercase characters have already been
// normalized.
return ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9'));
}
bool IsCanonicalizedHostCompliant(const std::string& host) {
if (host.empty())
return false;
bool in_component = false;
bool most_recent_component_started_alphanumeric = false;
for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) {
const char c = *i;
if (!in_component) {
most_recent_component_started_alphanumeric = IsHostCharAlphanumeric(c);
if (!most_recent_component_started_alphanumeric && (c != '-') &&
(c != '_')) {
return false;
}
in_component = true;
} else if (c == '.') {
in_component = false;
} else if (!IsHostCharAlphanumeric(c) && (c != '-') && (c != '_')) {
return false;
}
}
return most_recent_component_started_alphanumeric;
}
bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
std::string::const_iterator host_and_port_end,
std::string* host,
int* port) {
if (host_and_port_begin >= host_and_port_end)
return false;
// When using url, we use char*.
const char* auth_begin = &(*host_and_port_begin);
int auth_len = host_and_port_end - host_and_port_begin;
url::Component auth_component(0, auth_len);
url::Component username_component;
url::Component password_component;
url::Component hostname_component;
url::Component port_component;
url::ParseAuthority(auth_begin, auth_component, &username_component,
&password_component, &hostname_component, &port_component);
// There shouldn't be a username/password.
if (username_component.is_valid() || password_component.is_valid())
return false;
if (!hostname_component.is_nonempty())
return false; // Failed parsing.
int parsed_port_number = -1;
if (port_component.is_nonempty()) {
parsed_port_number = url::ParsePort(auth_begin, port_component);
// If parsing failed, port_number will be either PORT_INVALID or
// PORT_UNSPECIFIED, both of which are negative.
if (parsed_port_number < 0)
return false; // Failed parsing the port number.
}
if (port_component.len == 0)
return false; // Reject inputs like "foo:"
unsigned char tmp_ipv6_addr[16];
// If the hostname starts with a bracket, it is either an IPv6 literal or
// invalid. If it is an IPv6 literal then strip the brackets.
if (hostname_component.len > 0 &&
auth_begin[hostname_component.begin] == '[') {
if (auth_begin[hostname_component.end() - 1] == ']' &&
url::IPv6AddressToNumber(
auth_begin, hostname_component, tmp_ipv6_addr)) {
// Strip the brackets.
hostname_component.begin++;
hostname_component.len -= 2;
} else {
return false;
}
}
// Pass results back to caller.
host->assign(auth_begin + hostname_component.begin, hostname_component.len);
*port = parsed_port_number;
return true; // Success.
}
bool ParseHostAndPort(const std::string& host_and_port,
std::string* host,
int* port) {
return ParseHostAndPort(
host_and_port.begin(), host_and_port.end(), host, port);
}
std::string GetHostAndPort(const GURL& url) {
// For IPv6 literals, GURL::host() already includes the brackets so it is
// safe to just append a colon.
return base::StringPrintf("%s:%d", url.host().c_str(),
url.EffectiveIntPort());
}
std::string GetHostAndOptionalPort(const GURL& url) {
// For IPv6 literals, GURL::host() already includes the brackets
// so it is safe to just append a colon.
if (url.has_port())
return base::StringPrintf("%s:%s", url.host().c_str(), url.port().c_str());
return url.host();
}
bool IsHostnameNonUnique(const std::string& hostname) {
// CanonicalizeHost requires surrounding brackets to parse an IPv6 address.
const std::string host_or_ip = hostname.find(':') != std::string::npos ?
"[" + hostname + "]" : hostname;
url::CanonHostInfo host_info;
std::string canonical_name = CanonicalizeHost(host_or_ip, &host_info);
// If canonicalization fails, then the input is truly malformed. However,
// to avoid mis-reporting bad inputs as "non-unique", treat them as unique.
if (canonical_name.empty())
return false;
// If |hostname| is an IP address, check to see if it's in an IANA-reserved
// range.
if (host_info.IsIPAddress()) {
IPAddressNumber host_addr;
if (!ParseIPLiteralToNumber(hostname.substr(host_info.out_host.begin,
host_info.out_host.len),
&host_addr)) {
return false;
}
switch (host_info.family) {
case url::CanonHostInfo::IPV4:
case url::CanonHostInfo::IPV6:
return IsIPAddressReserved(host_addr);
case url::CanonHostInfo::NEUTRAL:
case url::CanonHostInfo::BROKEN:
return false;
}
}
// Check for a registry controlled portion of |hostname|, ignoring private
// registries, as they already chain to ICANN-administered registries,
// and explicitly ignoring unknown registries.
//
// Note: This means that as new gTLDs are introduced on the Internet, they
// will be treated as non-unique until the registry controlled domain list
// is updated. However, because gTLDs are expected to provide significant
// advance notice to deprecate older versions of this code, this an
// acceptable tradeoff.
return 0 == registry_controlled_domains::GetRegistryLength(
canonical_name,
registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
}
SockaddrStorage::SockaddrStorage(const SockaddrStorage& other)
: addr_len(other.addr_len),
addr(reinterpret_cast<struct sockaddr*>(&addr_storage)) {
memcpy(addr, other.addr, addr_len);
}
void SockaddrStorage::operator=(const SockaddrStorage& other) {
addr_len = other.addr_len;
// addr is already set to &this->addr_storage by default ctor.
memcpy(addr, other.addr, addr_len);
}
std::string GetHostName() {
#if defined(OS_NACL)
NOTIMPLEMENTED();
return std::string();
#else // defined(OS_NACL)
#if defined(OS_WIN)
EnsureWinsockInit();
#endif
// Host names are limited to 255 bytes.
char buffer[256];
int result = gethostname(buffer, sizeof(buffer));
if (result != 0) {
DVLOG(1) << "gethostname() failed with " << result;
buffer[0] = '\0';
}
return std::string(buffer);
#endif // !defined(OS_NACL)
}
void GetIdentityFromURL(const GURL& url,
base::string16* username,
base::string16* password) {
UnescapeRule::Type flags =
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS;
*username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags);
*password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags);
}
std::string GetHostOrSpecFromURL(const GURL& url) {
return url.has_host() ? TrimEndingDot(url.host_piece()) : url.spec();
}
GURL SimplifyUrlForRequest(const GURL& url) {
DCHECK(url.is_valid());
GURL::Replacements replacements;
replacements.ClearUsername();
replacements.ClearPassword();
replacements.ClearRef();
return url.ReplaceComponents(replacements);
}
bool ResolveLocalHostname(base::StringPiece host,
uint16_t port,
AddressList* address_list) {
static const unsigned char kLocalhostIPv4[] = {127, 0, 0, 1};
static const unsigned char kLocalhostIPv6[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
std::string normalized_host = NormalizeHostname(host);
address_list->clear();
bool is_local6 = IsLocal6Hostname(normalized_host);
if (!is_local6 && !IsLocalHostname(normalized_host))
return false;
address_list->push_back(
IPEndPoint(IPAddressNumber(kLocalhostIPv6,
kLocalhostIPv6 + arraysize(kLocalhostIPv6)),
port));
if (!is_local6) {
address_list->push_back(
IPEndPoint(IPAddressNumber(kLocalhostIPv4,
kLocalhostIPv4 + arraysize(kLocalhostIPv4)),
port));
}
return true;
}
bool IsLocalhost(base::StringPiece host) {
std::string normalized_host = NormalizeHostname(host);
if (IsLocalHostname(normalized_host) || IsLocal6Hostname(normalized_host))
return true;
IPAddressNumber ip_number;
if (ParseIPLiteralToNumber(host, &ip_number)) {
size_t size = ip_number.size();
switch (size) {
case kIPv4AddressSize: {
IPAddressNumber localhost_prefix;
localhost_prefix.push_back(127);
for (int i = 0; i < 3; ++i) {
localhost_prefix.push_back(0);
}
return IPNumberMatchesPrefix(ip_number, localhost_prefix, 8);
}
case kIPv6AddressSize: {
struct in6_addr sin6_addr;
memcpy(&sin6_addr, &ip_number[0], kIPv6AddressSize);
return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr);
}
default:
NOTREACHED();
}
}
return false;
}
bool HasGoogleHost(const GURL& url) {
static const char* kGoogleHostSuffixes[] = {
".google.com",
".youtube.com",
".gmail.com",
".doubleclick.net",
".gstatic.com",
".googlevideo.com",
".googleusercontent.com",
".googlesyndication.com",
".google-analytics.com",
".googleadservices.com",
".googleapis.com",
".ytimg.com",
};
base::StringPiece host = url.host_piece();
for (const char* suffix : kGoogleHostSuffixes) {
// Here it's possible to get away with faster case-sensitive comparisons
// because the list above is all lowercase, and a GURL's host name will
// always be canonicalized to lowercase as well.
if (base::EndsWith(host, suffix, base::CompareCase::SENSITIVE))
return true;
}
return false;
}
} // namespace net