Blame - components/link_header_util/link_header_util.cc - chromium/src.git

blob: 1438a22c404c044fdac832c5420b06d2196b8925 [file] [log] [blame]

mek	9b28f42	2016-05-13 21:46:23	[diff] [blame]	1	// Copyright 2016 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "components/link_header_util/link_header_util.h"
				6
				7	#include "base/strings/string_util.h"
				8	#include "net/http/http_util.h"
				9
				10	namespace link_header_util {
				11
				12	namespace {
				13
				14	// A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator.
				15	// Takes the parsing of StringTokenizer and adds support for quoted strings that
				16	// are quoted by matching <> (and does not support escaping in those strings).
				17	// Also has the behavior of ValuesIterator where it strips whitespace from all
				18	// values and only outputs non-empty values.
				19	// Only supports ',' as separator and supports "" and <> as quote chars.
				20	class ValueTokenizer {
				21	public:
				22	ValueTokenizer(std::string::const_iterator begin,
				23	std::string::const_iterator end)
				24	: token_begin_(begin), token_end_(begin), end_(end) {}
				25
				26	std::string::const_iterator token_begin() const { return token_begin_; }
				27	std::string::const_iterator token_end() const { return token_end_; }
				28
				29	bool GetNext() {
				30	while (GetNextInternal()) {
				31	net::HttpUtil::TrimLWS(&token_begin_, &token_end_);
				32
				33	// Only return non-empty values.
				34	if (token_begin_ != token_end_)
				35	return true;
				36	}
				37	return false;
				38	}
				39
				40	private:
				41	// Updates token_begin_ and token_end_ to point to the (possibly empty) next
				42	// token. Returns false if end-of-string was reached first.
				43	bool GetNextInternal() {
				44	// First time this is called token_end_ points to the first character in the
				45	// input. Every other time token_end_ points to the delimiter at the end of
				46	// the last returned token (which could be the end of the string).
				47
				48	// End of string, return false.
				49	if (token_end_ == end_)
				50	return false;
				51
				52	// Skip past the delimiter.
				53	if (*token_end_ == ',')
				54	++token_end_;
				55
				56	// Make token_begin_ point to the beginning of the next token, and search
				57	// for the end of the token in token_end_.
				58	token_begin_ = token_end_;
				59
				60	// Set to true if we're currently inside a quoted string.
				61	bool in_quote = false;
				62	// Set to true if we're currently inside a quoted string, and have just
				63	// encountered an escape character. In this case a closing quote will be
				64	// ignored.
				65	bool in_escape = false;
				66	// If currently in a quoted string, this is the character that (when not
				67	// escaped) indicates the end of the string.
				68	char quote_close_char = '\0';
				69	// If currently in a quoted string, this is set to true if it is possible to
				70	// escape the closing quote using '\'.
				71	bool quote_allows_escape = false;
				72
				73	while (token_end_ != end_) {
				74	char c = *token_end_;
				75	if (in_quote) {
				76	if (in_escape) {
				77	in_escape = false;
				78	} else if (quote_allows_escape && c == '\\') {
				79	in_escape = true;
				80	} else if (c == quote_close_char) {
				81	in_quote = false;
				82	}
				83	} else {
				84	if (c == ',')
				85	break;
				86	if (c == '"' \|\| c == '<') {
				87	in_quote = true;
				88	quote_close_char = (c == '<' ? '>' : c);
				89	quote_allows_escape = (c != '<');
				90	}
				91	}
				92	++token_end_;
				93	}
				94	return true;
				95	}
				96
				97	std::string::const_iterator token_begin_;
				98	std::string::const_iterator token_end_;
				99	std::string::const_iterator end_;
				100	};
				101
				102	// Parses the URL part of a Link header. When successful \|url_begin\| points
				103	// to the beginning of the url, \|url_end\| points to the end of the url and
				104	// \|params_begin\| points to the first character after the '>' character at the
				105	// end of the url.
				106	bool ExtractURL(std::string::const_iterator begin,
				107	std::string::const_iterator end,
				108	std::string::const_iterator* url_begin,
				109	std::string::const_iterator* url_end,
				110	std::string::const_iterator* params_begin) {
				111	// Extract the URL part (everything between '<' and first '>' character).
				112	if (*begin != '<')
				113	return false;
				114
				115	++begin;
				116	*url_begin = begin;
				117	*url_end = std::find(begin, end, '>');
				118
				119	// Fail if we did not find a '>'.
				120	if (*url_end == end)
				121	return false;
				122
				123	params_begin = url_end;
				124	// Skip the '>' at the end of the URL.
				125	++*params_begin;
				126
				127	// Trim whitespace from the URL.
				128	net::HttpUtil::TrimLWS(url_begin, url_end);
				129	return true;
				130	}
				131
				132	} // namespace
				133
				134	std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) {
				135	std::vector<StringIteratorPair> values;
				136	ValueTokenizer tokenizer(header.begin(), header.end());
				137	while (tokenizer.GetNext()) {
				138	values.push_back(
				139	StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end()));
				140	}
				141	return values;
				142	}
				143
				144	// Parses one link in a link header into its url and parameters.
				145	// A link is of the form "<some-url>; param1=value1; param2=value2".
				146	// Returns false if parsing the link failed, returns true on success. This
				147	// method is more lenient than the RFC. It doesn't fail on things like invalid
				148	// characters in the URL, and also doesn't verify that certain parameters should
				149	// or shouldn't be quoted strings.
				150	// If a parameter occurs more than once in the link, only the first value is
				151	// returned in params as this is the required behavior for all attributes chrome
				152	// currently cares about in link headers.
				153	bool ParseLinkHeaderValue(
				154	std::string::const_iterator begin,
				155	std::string::const_iterator end,
				156	std::string* url,
				157	std::unordered_map<std::string, base::Optional<std::string>>* params) {
				158	// Can't parse an empty string.
				159	if (begin == end)
				160	return false;
				161
				162	// Extract the URL part (everything between '<' and first '>' character).
				163	std::string::const_iterator url_begin;
				164	std::string::const_iterator url_end;
				165	if (!ExtractURL(begin, end, &url_begin, &url_end, &begin))
				166	return false;
				167	*url = std::string(url_begin, url_end);
				168
				169	// Trim any remaining whitespace, and make sure there is a ';' separating
				170	// parameters from the URL.
				171	net::HttpUtil::TrimLWS(&begin, &end);
				172	if (begin != end && *begin != ';')
				173	return false;
				174
				175	// Parse all the parameters.
				176	net::HttpUtil::NameValuePairsIterator params_iterator(
				177	begin, end, ';',
				178	net::HttpUtil::NameValuePairsIterator::Values::NOT_REQUIRED,
				179	net::HttpUtil::NameValuePairsIterator::Quotes::STRICT_QUOTES);
				180	while (params_iterator.GetNext()) {
				181	if (!net::HttpUtil::IsParmName(params_iterator.name_begin(),
				182	params_iterator.name_end()))
				183	return false;
				184	std::string name = base::ToLowerASCII(base::StringPiece(
				185	params_iterator.name_begin(), params_iterator.name_end()));
				186	if (!params_iterator.value_is_quoted() &&
				187	params_iterator.value_begin() == params_iterator.value_end())
				188	params->insert(std::make_pair(name, base::nullopt));
				189	else
				190	params->insert(std::make_pair(name, params_iterator.value()));
				191	}
				192	return params_iterator.valid();
				193	}
				194
				195	} // namespace link_header_util