[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 1 | // Copyright 2014 The Chromium Authors. All rights reserved. |
license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 4 | |
[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 5 | #ifndef COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_ |
| 6 | #define COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_ |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 7 | |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 8 | #include <vector> |
| 9 | |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 10 | #include "base/basictypes.h" |
[email protected] | d883056 | 2013-06-10 22:01:54 | [diff] [blame] | 11 | #include "base/strings/string16.h" |
[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 12 | #include "components/query_parser/snippet.h" |
| 13 | |
| 14 | namespace query_parser { |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 15 | |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 16 | class QueryNodeList; |
| 17 | |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 18 | // Used by HasMatchIn. |
| 19 | struct QueryWord { |
| 20 | // The work to match against. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 21 | base::string16 word; |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 22 | |
| 23 | // The starting position of the word in the original text. |
[email protected] | c29962f2 | 2008-12-03 00:47:58 | [diff] [blame] | 24 | size_t position; |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 25 | }; |
| 26 | |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame^] | 27 | enum class MatchingAlgorithm { |
| 28 | // Only words long enough are considered for prefix search. Shorter words are |
| 29 | // considered for exact matches. |
| 30 | DEFAULT, |
| 31 | // All words are considered for a prefix search. |
| 32 | ALWAYS_PREFIX_SEARCH, |
| 33 | }; |
| 34 | |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 35 | typedef std::vector<query_parser::QueryWord> QueryWordVector; |
| 36 | |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 37 | // QueryNode is used by QueryParser to represent the elements that constitute a |
| 38 | // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant |
| 39 | // for external usage. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 40 | class QueryNode { |
| 41 | public: |
| 42 | virtual ~QueryNode() {} |
| 43 | |
| 44 | // Serialize ourselves out to a string that can be passed to SQLite. Returns |
| 45 | // the number of words in this node. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 46 | virtual int AppendToSQLiteQuery(base::string16* query) const = 0; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 47 | |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 48 | // Return true if this is a QueryNodeWord, false if it's a QueryNodeList. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 49 | virtual bool IsWord() const = 0; |
| 50 | |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 51 | // Returns true if this node matches |word|. If |exact| is true, the string |
| 52 | // must exactly match. Otherwise, this uses a starts with comparison. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 53 | virtual bool Matches(const base::string16& word, bool exact) const = 0; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 54 | |
[email protected] | 2532060 | 2012-10-18 22:05:56 | [diff] [blame] | 55 | // Returns true if this node matches at least one of the words in |words|. An |
| 56 | // entry is added to |match_positions| for all matching words giving the |
| 57 | // matching regions. |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 58 | virtual bool HasMatchIn(const QueryWordVector& words, |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 59 | Snippet::MatchPositions* match_positions) const = 0; |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 60 | |
[email protected] | 5d592f0 | 2013-06-22 16:59:22 | [diff] [blame] | 61 | // Returns true if this node matches at least one of the words in |words|. |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 62 | virtual bool HasMatchIn(const QueryWordVector& words) const = 0; |
[email protected] | 5d592f0 | 2013-06-22 16:59:22 | [diff] [blame] | 63 | |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 64 | // Appends the words that make up this node in |words|. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 65 | virtual void AppendWords(std::vector<base::string16>* words) const = 0; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 66 | }; |
| 67 | |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 68 | typedef std::vector<query_parser::QueryNode*> QueryNodeStarVector; |
| 69 | |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 70 | // This class is used to parse queries entered into the history search into more |
| 71 | // normalized queries that can be passed to the SQLite backend. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 72 | class QueryParser { |
| 73 | public: |
| 74 | QueryParser(); |
| 75 | |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 76 | // For CJK ideographs and Korean Hangul, even a single character |
| 77 | // can be useful in prefix matching, but that may give us too many |
| 78 | // false positives. Moreover, the current ICU word breaker gives us |
| 79 | // back every single Chinese character as a word so that there's no |
| 80 | // point doing anything for them and we only adjust the minimum length |
| 81 | // to 2 for Korean Hangul while using 3 for others. This is a temporary |
| 82 | // hack until we have a segmentation support. |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame^] | 83 | static bool IsWordLongEnoughForPrefixSearch( |
| 84 | const base::string16& word, |
| 85 | MatchingAlgorithm matching_algorithm); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 86 | |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 87 | // Parse a query into a SQLite query. The resulting query is placed in |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 88 | // |sqlite_query| and the number of words is returned. |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame^] | 89 | int ParseQuery(const base::string16& query, |
| 90 | MatchingAlgorithm matching_algorithm, |
| 91 | base::string16* sqlite_query); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 92 | |
[email protected] | 8c793c8 | 2011-05-19 00:41:33 | [diff] [blame] | 93 | // Parses |query|, returning the words that make up it. Any words in quotes |
| 94 | // are put in |words| without the quotes. For example, the query text |
| 95 | // "foo bar" results in two entries being added to words, one for foo and one |
| 96 | // for bar. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 97 | void ParseQueryWords(const base::string16& query, |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame^] | 98 | MatchingAlgorithm matching_algorithm, |
[email protected] | d2065e06 | 2013-12-12 23:49:52 | [diff] [blame] | 99 | std::vector<base::string16>* words); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 100 | |
[email protected] | 8c793c8 | 2011-05-19 00:41:33 | [diff] [blame] | 101 | // Parses |query|, returning the nodes that constitute the valid words in the |
| 102 | // query. This is intended for later usage with DoesQueryMatch. Ownership of |
| 103 | // the nodes passes to the caller. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 104 | void ParseQueryNodes(const base::string16& query, |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame^] | 105 | MatchingAlgorithm matching_algorithm, |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 106 | QueryNodeStarVector* nodes); |
[email protected] | 7de9959 | 2008-12-09 19:16:02 | [diff] [blame] | 107 | |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 108 | // Returns true if the string text matches the query nodes created by a call |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 109 | // to ParseQuery. If the query does match, each of the matching positions in |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 110 | // the text is added to |match_positions|. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 111 | bool DoesQueryMatch(const base::string16& text, |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 112 | const QueryNodeStarVector& nodes, |
[email protected] | 6956cd6 | 2008-08-29 19:48:58 | [diff] [blame] | 113 | Snippet::MatchPositions* match_positions); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 114 | |
[email protected] | 5d592f0 | 2013-06-22 16:59:22 | [diff] [blame] | 115 | // Returns true if all of the |words| match the query |nodes| created by a |
| 116 | // call to ParseQuery. |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 117 | bool DoesQueryMatch(const QueryWordVector& words, |
| 118 | const QueryNodeStarVector& nodes); |
[email protected] | 5d592f0 | 2013-06-22 16:59:22 | [diff] [blame] | 119 | |
| 120 | // Extracts the words from |text|, placing each word into |words|. |
[email protected] | 439f1e3 | 2013-12-09 20:09:09 | [diff] [blame] | 121 | void ExtractQueryWords(const base::string16& text, |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 122 | QueryWordVector* words); |
| 123 | |
| 124 | // Sorts the match positions in |matches| by their first index, then |
| 125 | // coalesces any match positions that intersect each other. |
| 126 | static void SortAndCoalesceMatchPositions(Snippet::MatchPositions* matches); |
[email protected] | 5d592f0 | 2013-06-22 16:59:22 | [diff] [blame] | 127 | |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 128 | private: |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 129 | // Does the work of parsing |query|; creates nodes in |root| as appropriate. |
| 130 | // This is invoked from both of the ParseQuery methods. |
kkimlabs | f1a7a373 | 2014-11-04 10:30:46 | [diff] [blame^] | 131 | bool ParseQueryImpl(const base::string16& query, |
| 132 | MatchingAlgorithm matching_algorithm, |
| 133 | QueryNodeList* root); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 134 | |
[email protected] | 13f698d | 2011-05-12 21:55:45 | [diff] [blame] | 135 | DISALLOW_COPY_AND_ASSIGN(QueryParser); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 136 | }; |
| 137 | |
[email protected] | acf9f27 | 2014-04-15 23:04:00 | [diff] [blame] | 138 | } // namespace query_parser |
| 139 | |
| 140 | #endif // COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_ |