[email protected] | c47f86fa | 2014-04-30 02:20:18 | [diff] [blame] | 1 | // Copyright 2014 The Chromium Authors. All rights reserved. |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 5 | #ifndef COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |
| 6 | #define COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 7 | |
avi | bc5337b | 2015-12-25 23:16:33 | [diff] [blame] | 8 | #include <stddef.h> |
| 9 | |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 10 | #include <map> |
[email protected] | af77ce62 | 2014-05-10 11:48:16 | [diff] [blame] | 11 | #include <string> |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 12 | #include <vector> |
| 13 | |
ssid | 144fb7b8 | 2017-06-01 02:32:26 | [diff] [blame] | 14 | #include "base/containers/flat_set.h" |
tfarina | e6b7345 | 2015-07-03 17:59:44 | [diff] [blame] | 15 | #include "base/macros.h" |
Scott Violet | 3c91400 | 2018-05-03 18:23:05 | [diff] [blame] | 16 | #include "components/bookmarks/browser/titled_url_node_sorter.h" |
[email protected] | b3a8489 | 2014-04-23 04:28:07 | [diff] [blame] | 17 | #include "components/query_parser/query_parser.h" |
Anton Bikineev | 1156b5f | 2021-05-15 22:35:36 | [diff] [blame^] | 18 | #include "third_party/abseil-cpp/absl/types/optional.h" |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 19 | |
[email protected] | c45f562 | 2014-06-01 21:35:48 | [diff] [blame] | 20 | namespace bookmarks { |
| 21 | |
mattreynolds | 55324d6 | 2016-12-09 23:07:29 | [diff] [blame] | 22 | class TitledUrlNode; |
Scott Violet | 3c91400 | 2018-05-03 18:23:05 | [diff] [blame] | 23 | |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 24 | struct TitledUrlMatch; |
[email protected] | 19c602f1 | 2014-06-12 07:37:05 | [diff] [blame] | 25 | |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 26 | // TitledUrlIndex maintains an index of paired titles and URLs for quick lookup. |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 27 | // |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 28 | // TitledUrlIndex maintains the index (index_) as a map of sets. The map (type |
| 29 | // Index) maps from a lower case string to the set (type TitledUrlNodeSet) of |
mattreynolds | 55324d6 | 2016-12-09 23:07:29 | [diff] [blame] | 30 | // TitledUrlNodes that contain that string in their title or URL. |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 31 | class TitledUrlIndex { |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 32 | public: |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 33 | using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>; |
| 34 | |
mattreynolds | 80dc1ec | 2017-01-06 19:31:34 | [diff] [blame] | 35 | // Constructs a TitledUrlIndex. |sorter| is used to construct a sorted list |
| 36 | // of matches when matches are returned from the index. If null, matches are |
| 37 | // returned unsorted. |
Scott Violet | 3c91400 | 2018-05-03 18:23:05 | [diff] [blame] | 38 | explicit TitledUrlIndex( |
| 39 | std::unique_ptr<TitledUrlNodeSorter> sorter = nullptr); |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 40 | ~TitledUrlIndex(); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 41 | |
Scott Violet | 3c91400 | 2018-05-03 18:23:05 | [diff] [blame] | 42 | void SetNodeSorter(std::unique_ptr<TitledUrlNodeSorter> sorter); |
| 43 | |
mattreynolds | 55324d6 | 2016-12-09 23:07:29 | [diff] [blame] | 44 | // Invoked when a title/URL pair has been added to the model. |
| 45 | void Add(const TitledUrlNode* node); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 46 | |
mattreynolds | 55324d6 | 2016-12-09 23:07:29 | [diff] [blame] | 47 | // Invoked when a title/URL pair has been removed from the model. |
| 48 | void Remove(const TitledUrlNode* node); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 49 | |
mattreynolds | 55324d6 | 2016-12-09 23:07:29 | [diff] [blame] | 50 | // Returns up to |max_count| of matches containing each term from the text |
manukh | 92134fdb | 2020-11-05 20:33:01 | [diff] [blame] | 51 | // |query| in either the title, URL, or, if |match_ancestor_titles| is true, |
| 52 | // the titles of ancestor nodes. |matching_algorithm| determines the algorithm |
| 53 | // used by QueryParser internally to parse |query|. |
manukh | b9a9062b | 2020-10-16 22:51:17 | [diff] [blame] | 54 | std::vector<TitledUrlMatch> GetResultsMatching( |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 55 | const std::u16string& query, |
manukh | b9a9062b | 2020-10-16 22:51:17 | [diff] [blame] | 56 | size_t max_count, |
manukh | 92134fdb | 2020-11-05 20:33:01 | [diff] [blame] | 57 | query_parser::MatchingAlgorithm matching_algorithm, |
| 58 | bool match_ancestor_titles); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 59 | |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 60 | // For testing only. |
| 61 | TitledUrlNodeSet RetrieveNodesMatchingAllTermsForTesting( |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 62 | const std::vector<std::u16string>& terms, |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 63 | query_parser::MatchingAlgorithm matching_algorithm) const { |
| 64 | return RetrieveNodesMatchingAllTerms(terms, matching_algorithm); |
| 65 | } |
| 66 | |
manukh | 92134fdb | 2020-11-05 20:33:01 | [diff] [blame] | 67 | // For testing only. |
| 68 | TitledUrlNodeSet RetrieveNodesMatchingAnyTermsForTesting( |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 69 | const std::vector<std::u16string>& terms, |
manukh | 92134fdb | 2020-11-05 20:33:01 | [diff] [blame] | 70 | query_parser::MatchingAlgorithm matching_algorithm) const { |
| 71 | return RetrieveNodesMatchingAnyTerms(terms, matching_algorithm); |
| 72 | } |
| 73 | |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 74 | private: |
mattreynolds | 55324d6 | 2016-12-09 23:07:29 | [diff] [blame] | 75 | using TitledUrlNodes = std::vector<const TitledUrlNode*>; |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 76 | using Index = std::map<std::u16string, TitledUrlNodeSet>; |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 77 | |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 78 | // Constructs |sorted_nodes| by copying the matches in |matches| and sorting |
| 79 | // them. |
mattreynolds | 55324d6 | 2016-12-09 23:07:29 | [diff] [blame] | 80 | void SortMatches(const TitledUrlNodeSet& matches, |
| 81 | TitledUrlNodes* sorted_nodes) const; |
[email protected] | 2c685cc2 | 2009-08-28 00:17:44 | [diff] [blame] | 82 | |
manukh | b9a9062b | 2020-10-16 22:51:17 | [diff] [blame] | 83 | // Finds |query_nodes| matches in |node| and returns a TitledUrlMatch |
| 84 | // containing |node| and the matches. |
Anton Bikineev | 1156b5f | 2021-05-15 22:35:36 | [diff] [blame^] | 85 | absl::optional<TitledUrlMatch> MatchTitledUrlNodeWithQuery( |
manukh | b9a9062b | 2020-10-16 22:51:17 | [diff] [blame] | 86 | const TitledUrlNode* node, |
manukh | 92134fdb | 2020-11-05 20:33:01 | [diff] [blame] | 87 | const query_parser::QueryNodeVector& query_nodes, |
| 88 | bool match_ancestor_titles); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 89 | |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 90 | // Return matches for the specified |terms|. This is an intersection of each |
| 91 | // term's matches. |
| 92 | TitledUrlNodeSet RetrieveNodesMatchingAllTerms( |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 93 | const std::vector<std::u16string>& terms, |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 94 | query_parser::MatchingAlgorithm matching_algorithm) const; |
| 95 | |
manukh | 92134fdb | 2020-11-05 20:33:01 | [diff] [blame] | 96 | TitledUrlNodeSet RetrieveNodesMatchingAnyTerms( |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 97 | const std::vector<std::u16string>& terms, |
manukh | 92134fdb | 2020-11-05 20:33:01 | [diff] [blame] | 98 | query_parser::MatchingAlgorithm matching_algorithm) const; |
| 99 | |
| 100 | // Return matches for the specified |term|. May return duplicates. |
| 101 | TitledUrlNodes RetrieveNodesMatchingTerm( |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 102 | const std::u16string& term, |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 103 | query_parser::MatchingAlgorithm matching_algorithm) const; |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 104 | |
| 105 | // Returns the set of query words from |query|. |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 106 | static std::vector<std::u16string> ExtractQueryWords( |
| 107 | const std::u16string& query); |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 108 | |
| 109 | // Return the index terms for |node|. |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 110 | static std::vector<std::u16string> ExtractIndexTerms( |
manukh | c7370b6 | 2020-10-29 20:22:13 | [diff] [blame] | 111 | const TitledUrlNode* node); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 112 | |
| 113 | // Adds |node| to |index_|. |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 114 | void RegisterNode(const std::u16string& term, const TitledUrlNode* node); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 115 | |
| 116 | // Removes |node| from |index_|. |
Jan Wilken Dörrie | fa241ba | 2021-03-11 17:57:01 | [diff] [blame] | 117 | void UnregisterNode(const std::u16string& term, const TitledUrlNode* node); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 118 | |
| 119 | Index index_; |
| 120 | |
mattreynolds | 191b8872 | 2016-12-13 19:25:32 | [diff] [blame] | 121 | std::unique_ptr<TitledUrlNodeSorter> sorter_; |
[email protected] | 2c685cc2 | 2009-08-28 00:17:44 | [diff] [blame] | 122 | |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 123 | DISALLOW_COPY_AND_ASSIGN(TitledUrlIndex); |
[email protected] | 85d911c | 2009-05-19 03:59:42 | [diff] [blame] | 124 | }; |
| 125 | |
[email protected] | c45f562 | 2014-06-01 21:35:48 | [diff] [blame] | 126 | } // namespace bookmarks |
| 127 | |
mattreynolds | 25e9a31 | 2016-12-14 21:52:13 | [diff] [blame] | 128 | #endif // COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |