blob: 5e3f18b26e6b0c6486668b3e12f0bce31470f14f [file] [log] [blame]
[email protected]acf9f272014-04-15 23:04:001// Copyright 2014 The Chromium Authors. All rights reserved.
license.botbf09a502008-08-24 00:55:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit09911bf2008-07-26 23:55:294
[email protected]acf9f272014-04-15 23:04:005#ifndef COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
6#define COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
initial.commit09911bf2008-07-26 23:55:297
avif57136c12015-12-25 23:27:458#include <stddef.h>
9
avi8e000a72016-11-02 18:06:2010#include <memory>
initial.commit09911bf2008-07-26 23:55:2911#include <vector>
12
avif57136c12015-12-25 23:27:4513#include "base/macros.h"
[email protected]d8830562013-06-10 22:01:5414#include "base/strings/string16.h"
[email protected]acf9f272014-04-15 23:04:0015#include "components/query_parser/snippet.h"
16
17namespace query_parser {
[email protected]6956cd62008-08-29 19:48:5818
initial.commit09911bf2008-07-26 23:55:2919class QueryNodeList;
20
[email protected]6956cd62008-08-29 19:48:5821// Used by HasMatchIn.
22struct QueryWord {
23 // The work to match against.
[email protected]439f1e32013-12-09 20:09:0924 base::string16 word;
[email protected]6956cd62008-08-29 19:48:5825
26 // The starting position of the word in the original text.
[email protected]c29962f22008-12-03 00:47:5827 size_t position;
[email protected]6956cd62008-08-29 19:48:5828};
29
kkimlabsf1a7a3732014-11-04 10:30:4630enum class MatchingAlgorithm {
31 // Only words long enough are considered for prefix search. Shorter words are
32 // considered for exact matches.
33 DEFAULT,
34 // All words are considered for a prefix search.
35 ALWAYS_PREFIX_SEARCH,
36};
37
avi8e000a72016-11-02 18:06:2038using QueryWordVector = std::vector<query_parser::QueryWord>;
[email protected]b3a84892014-04-23 04:28:0739
[email protected]13f698d2011-05-12 21:55:4540// QueryNode is used by QueryParser to represent the elements that constitute a
41// query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
42// for external usage.
initial.commit09911bf2008-07-26 23:55:2943class QueryNode {
44 public:
45 virtual ~QueryNode() {}
46
47 // Serialize ourselves out to a string that can be passed to SQLite. Returns
48 // the number of words in this node.
[email protected]439f1e32013-12-09 20:09:0949 virtual int AppendToSQLiteQuery(base::string16* query) const = 0;
initial.commit09911bf2008-07-26 23:55:2950
[email protected]13f698d2011-05-12 21:55:4551 // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
initial.commit09911bf2008-07-26 23:55:2952 virtual bool IsWord() const = 0;
53
[email protected]13f698d2011-05-12 21:55:4554 // Returns true if this node matches |word|. If |exact| is true, the string
55 // must exactly match. Otherwise, this uses a starts with comparison.
[email protected]439f1e32013-12-09 20:09:0956 virtual bool Matches(const base::string16& word, bool exact) const = 0;
initial.commit09911bf2008-07-26 23:55:2957
[email protected]25320602012-10-18 22:05:5658 // Returns true if this node matches at least one of the words in |words|. An
59 // entry is added to |match_positions| for all matching words giving the
60 // matching regions.
[email protected]b3a84892014-04-23 04:28:0761 virtual bool HasMatchIn(const QueryWordVector& words,
[email protected]6956cd62008-08-29 19:48:5862 Snippet::MatchPositions* match_positions) const = 0;
[email protected]7de99592008-12-09 19:16:0263
[email protected]5d592f02013-06-22 16:59:2264 // Returns true if this node matches at least one of the words in |words|.
[email protected]b3a84892014-04-23 04:28:0765 virtual bool HasMatchIn(const QueryWordVector& words) const = 0;
[email protected]5d592f02013-06-22 16:59:2266
[email protected]7de99592008-12-09 19:16:0267 // Appends the words that make up this node in |words|.
[email protected]439f1e32013-12-09 20:09:0968 virtual void AppendWords(std::vector<base::string16>* words) const = 0;
initial.commit09911bf2008-07-26 23:55:2969};
70
avi8e000a72016-11-02 18:06:2071using QueryNodeVector = std::vector<std::unique_ptr<query_parser::QueryNode>>;
[email protected]b3a84892014-04-23 04:28:0772
[email protected]13f698d2011-05-12 21:55:4573// This class is used to parse queries entered into the history search into more
74// normalized queries that can be passed to the SQLite backend.
initial.commit09911bf2008-07-26 23:55:2975class QueryParser {
76 public:
77 QueryParser();
78
[email protected]85d911c2009-05-19 03:59:4279 // For CJK ideographs and Korean Hangul, even a single character
80 // can be useful in prefix matching, but that may give us too many
81 // false positives. Moreover, the current ICU word breaker gives us
82 // back every single Chinese character as a word so that there's no
83 // point doing anything for them and we only adjust the minimum length
84 // to 2 for Korean Hangul while using 3 for others. This is a temporary
85 // hack until we have a segmentation support.
kkimlabsf1a7a3732014-11-04 10:30:4686 static bool IsWordLongEnoughForPrefixSearch(
87 const base::string16& word,
88 MatchingAlgorithm matching_algorithm);
[email protected]85d911c2009-05-19 03:59:4289
initial.commit09911bf2008-07-26 23:55:2990 // Parse a query into a SQLite query. The resulting query is placed in
[email protected]13f698d2011-05-12 21:55:4591 // |sqlite_query| and the number of words is returned.
kkimlabsf1a7a3732014-11-04 10:30:4692 int ParseQuery(const base::string16& query,
93 MatchingAlgorithm matching_algorithm,
94 base::string16* sqlite_query);
initial.commit09911bf2008-07-26 23:55:2995
[email protected]8c793c82011-05-19 00:41:3396 // Parses |query|, returning the words that make up it. Any words in quotes
97 // are put in |words| without the quotes. For example, the query text
98 // "foo bar" results in two entries being added to words, one for foo and one
99 // for bar.
[email protected]439f1e32013-12-09 20:09:09100 void ParseQueryWords(const base::string16& query,
kkimlabsf1a7a3732014-11-04 10:30:46101 MatchingAlgorithm matching_algorithm,
[email protected]d2065e062013-12-12 23:49:52102 std::vector<base::string16>* words);
initial.commit09911bf2008-07-26 23:55:29103
[email protected]8c793c82011-05-19 00:41:33104 // Parses |query|, returning the nodes that constitute the valid words in the
105 // query. This is intended for later usage with DoesQueryMatch. Ownership of
106 // the nodes passes to the caller.
[email protected]439f1e32013-12-09 20:09:09107 void ParseQueryNodes(const base::string16& query,
kkimlabsf1a7a3732014-11-04 10:30:46108 MatchingAlgorithm matching_algorithm,
avi8e000a72016-11-02 18:06:20109 QueryNodeVector* nodes);
[email protected]7de99592008-12-09 19:16:02110
initial.commit09911bf2008-07-26 23:55:29111 // Returns true if the string text matches the query nodes created by a call
[email protected]13f698d2011-05-12 21:55:45112 // to ParseQuery. If the query does match, each of the matching positions in
[email protected]6956cd62008-08-29 19:48:58113 // the text is added to |match_positions|.
[email protected]439f1e32013-12-09 20:09:09114 bool DoesQueryMatch(const base::string16& text,
avi8e000a72016-11-02 18:06:20115 const QueryNodeVector& nodes,
[email protected]6956cd62008-08-29 19:48:58116 Snippet::MatchPositions* match_positions);
initial.commit09911bf2008-07-26 23:55:29117
[email protected]5d592f02013-06-22 16:59:22118 // Returns true if all of the |words| match the query |nodes| created by a
119 // call to ParseQuery.
[email protected]b3a84892014-04-23 04:28:07120 bool DoesQueryMatch(const QueryWordVector& words,
avi8e000a72016-11-02 18:06:20121 const QueryNodeVector& nodes);
[email protected]5d592f02013-06-22 16:59:22122
123 // Extracts the words from |text|, placing each word into |words|.
[email protected]439f1e32013-12-09 20:09:09124 void ExtractQueryWords(const base::string16& text,
[email protected]b3a84892014-04-23 04:28:07125 QueryWordVector* words);
126
127 // Sorts the match positions in |matches| by their first index, then
128 // coalesces any match positions that intersect each other.
129 static void SortAndCoalesceMatchPositions(Snippet::MatchPositions* matches);
[email protected]5d592f02013-06-22 16:59:22130
initial.commit09911bf2008-07-26 23:55:29131 private:
[email protected]13f698d2011-05-12 21:55:45132 // Does the work of parsing |query|; creates nodes in |root| as appropriate.
133 // This is invoked from both of the ParseQuery methods.
kkimlabsf1a7a3732014-11-04 10:30:46134 bool ParseQueryImpl(const base::string16& query,
135 MatchingAlgorithm matching_algorithm,
136 QueryNodeList* root);
initial.commit09911bf2008-07-26 23:55:29137
[email protected]13f698d2011-05-12 21:55:45138 DISALLOW_COPY_AND_ASSIGN(QueryParser);
initial.commit09911bf2008-07-26 23:55:29139};
140
[email protected]acf9f272014-04-15 23:04:00141} // namespace query_parser
142
143#endif // COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_