components/query_parser/query_parser.cc - chromium/src - Git at Google

 // Copyright 2014 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/query_parser/query_parser.h"

 #include <algorithm>
 #include <memory>
 #include <ostream>

 #include "base/check.h"
 #include "base/compiler_specific.h"
 #include "base/i18n/break_iterator.h"
 #include "base/i18n/case_conversion.h"
 #include "base/notreached.h"
 #include "base/ranges/algorithm.h"
 #include "base/strings/utf_string_conversions.h"

 namespace query_parser {
 namespace {

 // Returns true if |mp1.first| is less than |mp2.first|. This is used to
 // sort match positions.
 int CompareMatchPosition(const Snippet::MatchPosition& mp1,
                          const Snippet::MatchPosition& mp2) {
   return mp1.first < mp2.first;
 }

 // Returns true if |mp2| intersects |mp1|. This is intended for use by
 // CoalesceMatchesFrom and isn't meant as a general intersection comparison
 // function.
 bool SnippetIntersects(const Snippet::MatchPosition& mp1,
                        const Snippet::MatchPosition& mp2) {
   return mp2.first >= mp1.first && mp2.first <= mp1.second;
 }

 // Coalesces match positions in |matches| after index that intersect the match
 // position at |index|.
 void CoalesceMatchesFrom(size_t index, Snippet::MatchPositions* matches) {
   Snippet::MatchPosition& mp = (*matches)[index];
   for (auto i = matches->begin() + index + 1; i != matches->end();) {
     if (SnippetIntersects(mp, *i)) {
       mp.second = std::max(mp.second, i->second);
       i = matches->erase(i);
     } else {
       return;
     }
   }
 }

 // Returns true if the character is considered a quote.
 bool IsQueryQuote(wchar_t ch) {
   return ch == '"' ||
          ch == 0xab ||    // left pointing double angle bracket
          ch == 0xbb ||    // right pointing double angle bracket
          ch == 0x201c ||  // left double quotation mark
          ch == 0x201d ||  // right double quotation mark
          ch == 0x201e;    // double low-9 quotation mark
 }

 }  // namespace

 // Inheritance structure:
 // Queries are represented as trees of QueryNodes.
 // QueryNodes are either a collection of subnodes (a QueryNodeList)
 // or a single word (a QueryNodeWord).

 // A QueryNodeWord is a single word in the query.
 class QueryNodeWord : public QueryNode {
  public:
   explicit QueryNodeWord(const std::u16string& word,
                          MatchingAlgorithm matching_algorithm);

   QueryNodeWord(const QueryNodeWord&) = delete;
   QueryNodeWord& operator=(const QueryNodeWord&) = delete;

   ~QueryNodeWord() override;

   const std::u16string& word() const { return word_; }

   bool literal() const { return literal_; }
   void set_literal(bool literal) { literal_ = literal; }

   // QueryNode:
   int AppendToSQLiteQuery(std::u16string* query) const override;
   bool IsWord() const override;
   bool Matches(const std::u16string& word, bool exact) const override;
   bool HasMatchIn(const QueryWordVector& words,
                   Snippet::MatchPositions* match_positions) const override;
   bool HasMatchIn(const QueryWordVector& words, bool exact) const override;
   void AppendWords(std::vector<std::u16string>* words) const override;

  private:
   std::u16string word_;
   bool literal_;
   const MatchingAlgorithm matching_algorithm_;
 };

 QueryNodeWord::QueryNodeWord(const std::u16string& word,
                              MatchingAlgorithm matching_algorithm)
     : word_(word), literal_(false), matching_algorithm_(matching_algorithm) {}

 QueryNodeWord::~QueryNodeWord() {}

 int QueryNodeWord::AppendToSQLiteQuery(std::u16string* query) const {
   query->append(word_);

   // Use prefix search if we're not literal and long enough.
   if (!literal_ &&
       QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_))
     *query += L'*';
   return 1;
 }

 bool QueryNodeWord::IsWord() const {
   return true;
 }

 bool QueryNodeWord::Matches(const std::u16string& word, bool exact) const {
   if (exact ||
       !QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_))
     return word == word_;
   return word.size() >= word_.size() &&
          (word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
 }

 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words,
                                Snippet::MatchPositions* match_positions) const {
   bool matched = false;
   for (const auto& queryWord : words) {
     if (Matches(queryWord.word, false)) {
       size_t match_start = queryWord.position;
       match_positions->push_back(Snippet::MatchPosition(
           match_start, match_start + static_cast<int>(word_.size())));
       matched = true;
     }
   }
   return matched;
 }

 bool QueryNodeWord::HasMatchIn(const QueryWordVector& words, bool exact) const {
   return base::ranges::any_of(words, [&](const auto& query_word) {
     return Matches(query_word.word, exact);
   });
 }

 void QueryNodeWord::AppendWords(std::vector<std::u16string>* words) const {
   words->push_back(word_);
 }

 // A QueryNodeList has a collection of QueryNodes which are deleted in the end.
 class QueryNodeList : public QueryNode {
  public:
   QueryNodeList();

   QueryNodeList(const QueryNodeList&) = delete;
   QueryNodeList& operator=(const QueryNodeList&) = delete;

   ~QueryNodeList() override;

   QueryNodeVector* children() { return &children_; }

   void AddChild(std::unique_ptr<QueryNode> node);

   // Remove empty subnodes left over from other parsing.
   void RemoveEmptySubnodes();

   // QueryNode:
   int AppendToSQLiteQuery(std::u16string* query) const override;
   bool IsWord() const override;
   bool Matches(const std::u16string& word, bool exact) const override;
   bool HasMatchIn(const QueryWordVector& words,
                   Snippet::MatchPositions* match_positions) const override;
   bool HasMatchIn(const QueryWordVector& words, bool exact) const override;
   void AppendWords(std::vector<std::u16string>* words) const override;

  protected:
   int AppendChildrenToString(std::u16string* query) const;

   QueryNodeVector children_;
 };

 QueryNodeList::QueryNodeList() {}

 QueryNodeList::~QueryNodeList() {
 }

 void QueryNodeList::AddChild(std::unique_ptr<QueryNode> node) {
   children_.push_back(std::move(node));
 }

 void QueryNodeList::RemoveEmptySubnodes() {
   QueryNodeVector kept_children;
   for (size_t i = 0; i < children_.size(); ++i) {
     if (children_[i]->IsWord()) {
       kept_children.push_back(std::move(children_[i]));
       continue;
     }

     QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i].get());
     list_node->RemoveEmptySubnodes();
     if (!list_node->children()->empty())
       kept_children.push_back(std::move(children_[i]));
   }
   children_.swap(kept_children);
 }

 int QueryNodeList::AppendToSQLiteQuery(std::u16string* query) const {
   return AppendChildrenToString(query);
 }

 bool QueryNodeList::IsWord() const {
   return false;
 }

 bool QueryNodeList::Matches(const std::u16string& word, bool exact) const {
   NOTREACHED();
   return false;
 }

 bool QueryNodeList::HasMatchIn(const QueryWordVector& words,
                                Snippet::MatchPositions* match_positions) const {
   NOTREACHED();
   return false;
 }

 bool QueryNodeList::HasMatchIn(const QueryWordVector& words, bool exact) const {
   NOTREACHED();
   return false;
 }

 void QueryNodeList::AppendWords(std::vector<std::u16string>* words) const {
   for (size_t i = 0; i < children_.size(); ++i)
     children_[i]->AppendWords(words);
 }

 int QueryNodeList::AppendChildrenToString(std::u16string* query) const {
   int num_words = 0;
   for (auto node = children_.begin(); node != children_.end(); ++node) {
     if (node != children_.begin())
       query->push_back(L' ');
     num_words += (*node)->AppendToSQLiteQuery(query);
   }
   return num_words;
 }

 // A QueryNodePhrase is a phrase query ("quoted").
 class QueryNodePhrase : public QueryNodeList {
  public:
   QueryNodePhrase();

   QueryNodePhrase(const QueryNodePhrase&) = delete;
   QueryNodePhrase& operator=(const QueryNodePhrase&) = delete;

   ~QueryNodePhrase() override;

   // QueryNodeList:
   int AppendToSQLiteQuery(std::u16string* query) const override;
   bool HasMatchIn(const QueryWordVector& words,
                   Snippet::MatchPositions* match_positions) const override;
   bool HasMatchIn(const QueryWordVector& words, bool exact) const override;

  private:
   bool MatchesAll(const QueryWordVector& words,
                   const QueryWord** first_word,
                   const QueryWord** last_word) const;
 };

 QueryNodePhrase::QueryNodePhrase() {}

 QueryNodePhrase::~QueryNodePhrase() {}

 int QueryNodePhrase::AppendToSQLiteQuery(std::u16string* query) const {
   query->push_back(L'"');
   int num_words = AppendChildrenToString(query);
   query->push_back(L'"');
   return num_words;
 }

 bool QueryNodePhrase::MatchesAll(const QueryWordVector& words,
                                  const QueryWord** first_word,
                                  const QueryWord** last_word) const {
   if (words.size() < children_.size())
     return false;

   for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) {
     bool matched_all = true;
     for (size_t j = 0; j < children_.size(); ++j) {
       if (!children_[j]->Matches(words[i + j].word, true)) {
         matched_all = false;
         break;
       }
     }
     if (matched_all) {
       *first_word = &words[i];
       *last_word = &words[i + children_.size() - 1];
       return true;
     }
   }
   return false;
 }

 bool QueryNodePhrase::HasMatchIn(
     const QueryWordVector& words,
     Snippet::MatchPositions* match_positions) const {
   const QueryWord* first_word;
   const QueryWord* last_word;

   if (MatchesAll(words, &first_word, &last_word)) {
     match_positions->push_back(
         Snippet::MatchPosition(first_word->position,
                                last_word->position + last_word->word.length()));
       return true;
   }
   return false;
 }

 bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words,
                                  bool exact) const {
   const QueryWord* first_word;
   const QueryWord* last_word;
   return MatchesAll(words, &first_word, &last_word);
 }

 // static
 bool QueryParser::IsWordLongEnoughForPrefixSearch(
     const std::u16string& word,
     MatchingAlgorithm matching_algorithm) {
   if (matching_algorithm == MatchingAlgorithm::ALWAYS_PREFIX_SEARCH)
     return true;

   DCHECK(!word.empty());
   size_t minimum_length = 3;
   // We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
   // because they 'behave like' Latin letters. Moreover, we should
   // normalize the former before reaching here.
   if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
     minimum_length = 2;
   return word.size() >= minimum_length;
 }

 // static
 int QueryParser::ParseQuery(const std::u16string& query,
                             MatchingAlgorithm matching_algorithm,
                             std::u16string* sqlite_query) {
   QueryNodeList root;
   if (!ParseQueryImpl(query, matching_algorithm, &root))
     return 0;
   return root.AppendToSQLiteQuery(sqlite_query);
 }

 // static
 void QueryParser::ParseQueryWords(const std::u16string& query,
                                   MatchingAlgorithm matching_algorithm,
                                   std::vector<std::u16string>* words) {
   QueryNodeList root;
   if (!ParseQueryImpl(query, matching_algorithm, &root))
     return;
   root.AppendWords(words);
 }

 // static
 void QueryParser::ParseQueryNodes(const std::u16string& query,
                                   MatchingAlgorithm matching_algorithm,
                                   QueryNodeVector* nodes) {
   QueryNodeList root;
   if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root))
     nodes->swap(*root.children());
 }

 // static
 bool QueryParser::DoesQueryMatch(const std::u16string& find_in_text,
                                  const QueryNodeVector& find_nodes,
                                  Snippet::MatchPositions* match_positions) {
   if (find_nodes.empty())
     return false;

   QueryWordVector query_words;
   std::u16string lower_find_in_text = base::i18n::ToLower(find_in_text);
   ExtractQueryWords(lower_find_in_text, &query_words);

   if (query_words.empty())
     return false;

   Snippet::MatchPositions matches;
   for (auto& find_node : find_nodes) {
     if (!find_node->HasMatchIn(query_words, &matches))
       return false;
   }
   if (lower_find_in_text.length() != find_in_text.length()) {
     // The lower case string differs from the original string. The matches are
     // meaningless.
     // TODO(sky): we need a better way to align the positions so that we don't
     // completely punt here.
     match_positions->clear();
   } else {
     SortAndCoalesceMatchPositions(&matches);
     match_positions->swap(matches);
   }
   return true;
 }

 // static
 bool QueryParser::DoesQueryMatch(const QueryWordVector& find_in_words,
                                  const QueryNodeVector& find_nodes,
                                  bool exact) {
   if (find_nodes.empty() || find_in_words.empty())
     return false;
   return base::ranges::all_of(find_nodes, [&](const auto& find_node) {
     return find_node->HasMatchIn(find_in_words, exact);
   });
 }

 // static
 bool QueryParser::ParseQueryImpl(const std::u16string& query,
                                  MatchingAlgorithm matching_algorithm,
                                  QueryNodeList* root) {
   base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD);
   // TODO(evanm): support a locale here
   if (!iter.Init())
     return false;

   // To handle nesting, we maintain a stack of QueryNodeLists.
   // The last element (back) of the stack contains the current, deepest node.
   std::vector<QueryNodeList*> query_stack;
   query_stack.push_back(root);

   bool in_quotes = false;  // whether we're currently in a quoted phrase
   while (iter.Advance()) {
     // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
     // is not necessarily a word, but could also be a sequence of punctuation
     // or whitespace.
     if (iter.IsWord()) {
       std::unique_ptr<QueryNodeWord> word_node =
           std::make_unique<QueryNodeWord>(iter.GetString(), matching_algorithm);
       if (in_quotes)
         word_node->set_literal(true);
       query_stack.back()->AddChild(std::move(word_node));
     } else {  // Punctuation.
       if (IsQueryQuote(query[iter.prev()])) {
         if (!in_quotes) {
           std::unique_ptr<QueryNodeList> quotes_node =
               std::make_unique<QueryNodePhrase>();
           QueryNodeList* quotes_node_ptr = quotes_node.get();
           query_stack.back()->AddChild(std::move(quotes_node));
           query_stack.push_back(quotes_node_ptr);
           in_quotes = true;
         } else {
           query_stack.pop_back();  // Stop adding to the quoted phrase.
           in_quotes = false;
         }
       }
     }
   }

   root->RemoveEmptySubnodes();
   return true;
 }

 // static
 void QueryParser::ExtractQueryWords(const std::u16string& text,
                                     QueryWordVector* words) {
   DCHECK(text == base::i18n::ToLower(text))
       << "The caller must have already lowercased `text`. Value = "
       << base::UTF16ToUTF8(text);
   base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
   // TODO(evanm): support a locale here
   if (!iter.Init())
     return;

   while (iter.Advance()) {
     // Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
     // is not necessarily a word, but could also be a sequence of punctuation
     // or whitespace.
     if (iter.IsWord()) {
       std::u16string word = iter.GetString();
       if (!word.empty()) {
         words->push_back(QueryWord());
         words->back().word = word;
         words->back().position = iter.prev();
      }
     }
   }
 }

 // static
 void QueryParser::SortAndCoalesceMatchPositions(
     Snippet::MatchPositions* matches) {
   std::sort(matches->begin(), matches->end(), &CompareMatchPosition);
   // WARNING: we don't use iterator here as CoalesceMatchesFrom may remove
   // from matches.
   for (size_t i = 0; i < matches->size(); ++i)
     CoalesceMatchesFrom(i, matches);
 }

 }  // namespace query_parser
	// Copyright 2014 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "components/query_parser/query_parser.h"

	#include <algorithm>
	#include <memory>
	#include <ostream>

	#include "base/check.h"
	#include "base/compiler_specific.h"
	#include "base/i18n/break_iterator.h"
	#include "base/i18n/case_conversion.h"
	#include "base/notreached.h"
	#include "base/ranges/algorithm.h"
	#include "base/strings/utf_string_conversions.h"

	namespace query_parser {
	namespace {

	// Returns true if \|mp1.first\| is less than \|mp2.first\|. This is used to
	// sort match positions.
	int CompareMatchPosition(const Snippet::MatchPosition& mp1,
	const Snippet::MatchPosition& mp2) {
	return mp1.first < mp2.first;
	}

	// Returns true if \|mp2\| intersects \|mp1\|. This is intended for use by
	// CoalesceMatchesFrom and isn't meant as a general intersection comparison
	// function.
	bool SnippetIntersects(const Snippet::MatchPosition& mp1,
	const Snippet::MatchPosition& mp2) {
	return mp2.first >= mp1.first && mp2.first <= mp1.second;
	}

	// Coalesces match positions in \|matches\| after index that intersect the match
	// position at \|index\|.
	void CoalesceMatchesFrom(size_t index, Snippet::MatchPositions* matches) {
	Snippet::MatchPosition& mp = (*matches)[index];
	for (auto i = matches->begin() + index + 1; i != matches->end();) {
	if (SnippetIntersects(mp, *i)) {
	mp.second = std::max(mp.second, i->second);
	i = matches->erase(i);
	} else {
	return;
	}
	}
	}

	// Returns true if the character is considered a quote.
	bool IsQueryQuote(wchar_t ch) {
	return ch == '"' \|\|
	ch == 0xab \|\| // left pointing double angle bracket
	ch == 0xbb \|\| // right pointing double angle bracket
	ch == 0x201c \|\| // left double quotation mark
	ch == 0x201d \|\| // right double quotation mark
	ch == 0x201e; // double low-9 quotation mark
	}

	} // namespace

	// Inheritance structure:
	// Queries are represented as trees of QueryNodes.
	// QueryNodes are either a collection of subnodes (a QueryNodeList)
	// or a single word (a QueryNodeWord).

	// A QueryNodeWord is a single word in the query.
	class QueryNodeWord : public QueryNode {
	public:
	explicit QueryNodeWord(const std::u16string& word,
	MatchingAlgorithm matching_algorithm);

	QueryNodeWord(const QueryNodeWord&) = delete;
	QueryNodeWord& operator=(const QueryNodeWord&) = delete;

	~QueryNodeWord() override;

	const std::u16string& word() const { return word_; }

	bool literal() const { return literal_; }
	void set_literal(bool literal) { literal_ = literal; }

	// QueryNode:
	int AppendToSQLiteQuery(std::u16string* query) const override;
	bool IsWord() const override;
	bool Matches(const std::u16string& word, bool exact) const override;
	bool HasMatchIn(const QueryWordVector& words,
	Snippet::MatchPositions* match_positions) const override;
	bool HasMatchIn(const QueryWordVector& words, bool exact) const override;
	void AppendWords(std::vector<std::u16string>* words) const override;

	private:
	std::u16string word_;
	bool literal_;
	const MatchingAlgorithm matching_algorithm_;
	};

	QueryNodeWord::QueryNodeWord(const std::u16string& word,
	MatchingAlgorithm matching_algorithm)
	: word_(word), literal_(false), matching_algorithm_(matching_algorithm) {}

	QueryNodeWord::~QueryNodeWord() {}

	int QueryNodeWord::AppendToSQLiteQuery(std::u16string* query) const {
	query->append(word_);

	// Use prefix search if we're not literal and long enough.
	if (!literal_ &&
	QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_))
	query += L'';
	return 1;
	}

	bool QueryNodeWord::IsWord() const {
	return true;
	}

	bool QueryNodeWord::Matches(const std::u16string& word, bool exact) const {
	if (exact \|\|
	!QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_))
	return word == word_;
	return word.size() >= word_.size() &&
	(word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
	}

	bool QueryNodeWord::HasMatchIn(const QueryWordVector& words,
	Snippet::MatchPositions* match_positions) const {
	bool matched = false;
	for (const auto& queryWord : words) {
	if (Matches(queryWord.word, false)) {
	size_t match_start = queryWord.position;
	match_positions->push_back(Snippet::MatchPosition(
	match_start, match_start + static_cast<int>(word_.size())));
	matched = true;
	}
	}
	return matched;
	}

	bool QueryNodeWord::HasMatchIn(const QueryWordVector& words, bool exact) const {
	return base::ranges::any_of(words, [&](const auto& query_word) {
	return Matches(query_word.word, exact);
	});
	}

	void QueryNodeWord::AppendWords(std::vector<std::u16string>* words) const {
	words->push_back(word_);
	}

	// A QueryNodeList has a collection of QueryNodes which are deleted in the end.
	class QueryNodeList : public QueryNode {
	public:
	QueryNodeList();

	QueryNodeList(const QueryNodeList&) = delete;
	QueryNodeList& operator=(const QueryNodeList&) = delete;

	~QueryNodeList() override;

	QueryNodeVector* children() { return &children_; }

	void AddChild(std::unique_ptr<QueryNode> node);

	// Remove empty subnodes left over from other parsing.
	void RemoveEmptySubnodes();

	// QueryNode:
	int AppendToSQLiteQuery(std::u16string* query) const override;
	bool IsWord() const override;
	bool Matches(const std::u16string& word, bool exact) const override;
	bool HasMatchIn(const QueryWordVector& words,
	Snippet::MatchPositions* match_positions) const override;
	bool HasMatchIn(const QueryWordVector& words, bool exact) const override;
	void AppendWords(std::vector<std::u16string>* words) const override;

	protected:
	int AppendChildrenToString(std::u16string* query) const;

	QueryNodeVector children_;
	};

	QueryNodeList::QueryNodeList() {}

	QueryNodeList::~QueryNodeList() {
	}

	void QueryNodeList::AddChild(std::unique_ptr<QueryNode> node) {
	children_.push_back(std::move(node));
	}

	void QueryNodeList::RemoveEmptySubnodes() {
	QueryNodeVector kept_children;
	for (size_t i = 0; i < children_.size(); ++i) {
	if (children_[i]->IsWord()) {
	kept_children.push_back(std::move(children_[i]));
	continue;
	}

	QueryNodeList* list_node = static_cast<QueryNodeList*>(children_[i].get());
	list_node->RemoveEmptySubnodes();
	if (!list_node->children()->empty())
	kept_children.push_back(std::move(children_[i]));
	}
	children_.swap(kept_children);
	}

	int QueryNodeList::AppendToSQLiteQuery(std::u16string* query) const {
	return AppendChildrenToString(query);
	}

	bool QueryNodeList::IsWord() const {
	return false;
	}

	bool QueryNodeList::Matches(const std::u16string& word, bool exact) const {
	NOTREACHED();
	return false;
	}

	bool QueryNodeList::HasMatchIn(const QueryWordVector& words,
	Snippet::MatchPositions* match_positions) const {
	NOTREACHED();
	return false;
	}

	bool QueryNodeList::HasMatchIn(const QueryWordVector& words, bool exact) const {
	NOTREACHED();
	return false;
	}

	void QueryNodeList::AppendWords(std::vector<std::u16string>* words) const {
	for (size_t i = 0; i < children_.size(); ++i)
	children_[i]->AppendWords(words);
	}

	int QueryNodeList::AppendChildrenToString(std::u16string* query) const {
	int num_words = 0;
	for (auto node = children_.begin(); node != children_.end(); ++node) {
	if (node != children_.begin())
	query->push_back(L' ');
	num_words += (*node)->AppendToSQLiteQuery(query);
	}
	return num_words;
	}

	// A QueryNodePhrase is a phrase query ("quoted").
	class QueryNodePhrase : public QueryNodeList {
	public:
	QueryNodePhrase();

	QueryNodePhrase(const QueryNodePhrase&) = delete;
	QueryNodePhrase& operator=(const QueryNodePhrase&) = delete;

	~QueryNodePhrase() override;

	// QueryNodeList:
	int AppendToSQLiteQuery(std::u16string* query) const override;
	bool HasMatchIn(const QueryWordVector& words,
	Snippet::MatchPositions* match_positions) const override;
	bool HasMatchIn(const QueryWordVector& words, bool exact) const override;

	private:
	bool MatchesAll(const QueryWordVector& words,
	const QueryWord** first_word,
	const QueryWord** last_word) const;
	};

	QueryNodePhrase::QueryNodePhrase() {}

	QueryNodePhrase::~QueryNodePhrase() {}

	int QueryNodePhrase::AppendToSQLiteQuery(std::u16string* query) const {
	query->push_back(L'"');
	int num_words = AppendChildrenToString(query);
	query->push_back(L'"');
	return num_words;
	}

	bool QueryNodePhrase::MatchesAll(const QueryWordVector& words,
	const QueryWord** first_word,
	const QueryWord** last_word) const {
	if (words.size() < children_.size())
	return false;

	for (size_t i = 0, max = words.size() - children_.size() + 1; i < max; ++i) {
	bool matched_all = true;
	for (size_t j = 0; j < children_.size(); ++j) {
	if (!children_[j]->Matches(words[i + j].word, true)) {
	matched_all = false;
	break;
	}
	}
	if (matched_all) {
	*first_word = &words[i];
	*last_word = &words[i + children_.size() - 1];
	return true;
	}
	}
	return false;
	}

	bool QueryNodePhrase::HasMatchIn(
	const QueryWordVector& words,
	Snippet::MatchPositions* match_positions) const {
	const QueryWord* first_word;
	const QueryWord* last_word;

	if (MatchesAll(words, &first_word, &last_word)) {
	match_positions->push_back(
	Snippet::MatchPosition(first_word->position,
	last_word->position + last_word->word.length()));
	return true;
	}
	return false;
	}

	bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words,
	bool exact) const {
	const QueryWord* first_word;
	const QueryWord* last_word;
	return MatchesAll(words, &first_word, &last_word);
	}

	// static
	bool QueryParser::IsWordLongEnoughForPrefixSearch(
	const std::u16string& word,
	MatchingAlgorithm matching_algorithm) {
	if (matching_algorithm == MatchingAlgorithm::ALWAYS_PREFIX_SEARCH)
	return true;

	DCHECK(!word.empty());
	size_t minimum_length = 3;
	// We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
	// because they 'behave like' Latin letters. Moreover, we should
	// normalize the former before reaching here.
	if (0xAC00 <= word[0] && word[0] <= 0xD7A3)
	minimum_length = 2;
	return word.size() >= minimum_length;
	}

	// static
	int QueryParser::ParseQuery(const std::u16string& query,
	MatchingAlgorithm matching_algorithm,
	std::u16string* sqlite_query) {
	QueryNodeList root;
	if (!ParseQueryImpl(query, matching_algorithm, &root))
	return 0;
	return root.AppendToSQLiteQuery(sqlite_query);
	}

	// static
	void QueryParser::ParseQueryWords(const std::u16string& query,
	MatchingAlgorithm matching_algorithm,
	std::vector<std::u16string>* words) {
	QueryNodeList root;
	if (!ParseQueryImpl(query, matching_algorithm, &root))
	return;
	root.AppendWords(words);
	}

	// static
	void QueryParser::ParseQueryNodes(const std::u16string& query,
	MatchingAlgorithm matching_algorithm,
	QueryNodeVector* nodes) {
	QueryNodeList root;
	if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root))
	nodes->swap(*root.children());
	}

	// static
	bool QueryParser::DoesQueryMatch(const std::u16string& find_in_text,
	const QueryNodeVector& find_nodes,
	Snippet::MatchPositions* match_positions) {
	if (find_nodes.empty())
	return false;

	QueryWordVector query_words;
	std::u16string lower_find_in_text = base::i18n::ToLower(find_in_text);
	ExtractQueryWords(lower_find_in_text, &query_words);

	if (query_words.empty())
	return false;

	Snippet::MatchPositions matches;
	for (auto& find_node : find_nodes) {
	if (!find_node->HasMatchIn(query_words, &matches))
	return false;
	}
	if (lower_find_in_text.length() != find_in_text.length()) {
	// The lower case string differs from the original string. The matches are
	// meaningless.
	// TODO(sky): we need a better way to align the positions so that we don't
	// completely punt here.
	match_positions->clear();
	} else {
	SortAndCoalesceMatchPositions(&matches);
	match_positions->swap(matches);
	}
	return true;
	}

	// static
	bool QueryParser::DoesQueryMatch(const QueryWordVector& find_in_words,
	const QueryNodeVector& find_nodes,
	bool exact) {
	if (find_nodes.empty() \|\| find_in_words.empty())
	return false;
	return base::ranges::all_of(find_nodes, [&](const auto& find_node) {
	return find_node->HasMatchIn(find_in_words, exact);
	});
	}

	// static
	bool QueryParser::ParseQueryImpl(const std::u16string& query,
	MatchingAlgorithm matching_algorithm,
	QueryNodeList* root) {
	base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD);
	// TODO(evanm): support a locale here
	if (!iter.Init())
	return false;

	// To handle nesting, we maintain a stack of QueryNodeLists.
	// The last element (back) of the stack contains the current, deepest node.
	std::vector<QueryNodeList*> query_stack;
	query_stack.push_back(root);

	bool in_quotes = false; // whether we're currently in a quoted phrase
	while (iter.Advance()) {
	// Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
	// is not necessarily a word, but could also be a sequence of punctuation
	// or whitespace.
	if (iter.IsWord()) {
	std::unique_ptr<QueryNodeWord> word_node =
	std::make_unique<QueryNodeWord>(iter.GetString(), matching_algorithm);
	if (in_quotes)
	word_node->set_literal(true);
	query_stack.back()->AddChild(std::move(word_node));
	} else { // Punctuation.
	if (IsQueryQuote(query[iter.prev()])) {
	if (!in_quotes) {
	std::unique_ptr<QueryNodeList> quotes_node =
	std::make_unique<QueryNodePhrase>();
	QueryNodeList* quotes_node_ptr = quotes_node.get();
	query_stack.back()->AddChild(std::move(quotes_node));
	query_stack.push_back(quotes_node_ptr);
	in_quotes = true;
	} else {
	query_stack.pop_back(); // Stop adding to the quoted phrase.
	in_quotes = false;
	}
	}
	}
	}

	root->RemoveEmptySubnodes();
	return true;
	}

	// static
	void QueryParser::ExtractQueryWords(const std::u16string& text,
	QueryWordVector* words) {
	DCHECK(text == base::i18n::ToLower(text))
	<< "The caller must have already lowercased `text`. Value = "
	<< base::UTF16ToUTF8(text);
	base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
	// TODO(evanm): support a locale here
	if (!iter.Init())
	return;

	while (iter.Advance()) {
	// Just found a span between 'prev' (inclusive) and 'pos' (exclusive). It
	// is not necessarily a word, but could also be a sequence of punctuation
	// or whitespace.
	if (iter.IsWord()) {
	std::u16string word = iter.GetString();
	if (!word.empty()) {
	words->push_back(QueryWord());
	words->back().word = word;
	words->back().position = iter.prev();
	}
	}
	}
	}

	// static
	void QueryParser::SortAndCoalesceMatchPositions(
	Snippet::MatchPositions* matches) {
	std::sort(matches->begin(), matches->end(), &CompareMatchPosition);
	// WARNING: we don't use iterator here as CoalesceMatchesFrom may remove
	// from matches.
	for (size_t i = 0; i < matches->size(); ++i)
	CoalesceMatchesFrom(i, matches);
	}

	} // namespace query_parser