blob: 6420879ddd9e06d47838e3dea3be4ef6170b4663 [file] [log] [blame]
[email protected]c47f86fa2014-04-30 02:20:181// Copyright 2014 The Chromium Authors. All rights reserved.
[email protected]85d911c2009-05-19 03:59:422// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
mattreynolds25e9a312016-12-14 21:52:135#ifndef COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_
6#define COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_
[email protected]85d911c2009-05-19 03:59:427
avibc5337b2015-12-25 23:16:338#include <stddef.h>
9
[email protected]85d911c2009-05-19 03:59:4210#include <map>
[email protected]af77ce622014-05-10 11:48:1611#include <string>
[email protected]85d911c2009-05-19 03:59:4212#include <vector>
13
ssid144fb7b82017-06-01 02:32:2614#include "base/containers/flat_set.h"
tfarinae6b73452015-07-03 17:59:4415#include "base/macros.h"
Scott Violet3c914002018-05-03 18:23:0516#include "components/bookmarks/browser/titled_url_node_sorter.h"
[email protected]b3a84892014-04-23 04:28:0717#include "components/query_parser/query_parser.h"
Anton Bikineev1156b5f2021-05-15 22:35:3618#include "third_party/abseil-cpp/absl/types/optional.h"
[email protected]85d911c2009-05-19 03:59:4219
[email protected]c45f5622014-06-01 21:35:4820namespace bookmarks {
21
mattreynolds55324d62016-12-09 23:07:2922class TitledUrlNode;
Scott Violet3c914002018-05-03 18:23:0523
mattreynolds25e9a312016-12-14 21:52:1324struct TitledUrlMatch;
[email protected]19c602f12014-06-12 07:37:0525
mattreynolds25e9a312016-12-14 21:52:1326// TitledUrlIndex maintains an index of paired titles and URLs for quick lookup.
[email protected]85d911c2009-05-19 03:59:4227//
mattreynolds25e9a312016-12-14 21:52:1328// TitledUrlIndex maintains the index (index_) as a map of sets. The map (type
29// Index) maps from a lower case string to the set (type TitledUrlNodeSet) of
mattreynolds55324d62016-12-09 23:07:2930// TitledUrlNodes that contain that string in their title or URL.
mattreynolds25e9a312016-12-14 21:52:1331class TitledUrlIndex {
[email protected]85d911c2009-05-19 03:59:4232 public:
manukhc7370b62020-10-29 20:22:1333 using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>;
34
mattreynolds80dc1ec2017-01-06 19:31:3435 // Constructs a TitledUrlIndex. |sorter| is used to construct a sorted list
36 // of matches when matches are returned from the index. If null, matches are
37 // returned unsorted.
Scott Violet3c914002018-05-03 18:23:0538 explicit TitledUrlIndex(
39 std::unique_ptr<TitledUrlNodeSorter> sorter = nullptr);
mattreynolds25e9a312016-12-14 21:52:1340 ~TitledUrlIndex();
[email protected]85d911c2009-05-19 03:59:4241
Scott Violet3c914002018-05-03 18:23:0542 void SetNodeSorter(std::unique_ptr<TitledUrlNodeSorter> sorter);
43
mattreynolds55324d62016-12-09 23:07:2944 // Invoked when a title/URL pair has been added to the model.
45 void Add(const TitledUrlNode* node);
[email protected]85d911c2009-05-19 03:59:4246
mattreynolds55324d62016-12-09 23:07:2947 // Invoked when a title/URL pair has been removed from the model.
48 void Remove(const TitledUrlNode* node);
[email protected]85d911c2009-05-19 03:59:4249
mattreynolds55324d62016-12-09 23:07:2950 // Returns up to |max_count| of matches containing each term from the text
manukh92134fdb2020-11-05 20:33:0151 // |query| in either the title, URL, or, if |match_ancestor_titles| is true,
52 // the titles of ancestor nodes. |matching_algorithm| determines the algorithm
53 // used by QueryParser internally to parse |query|.
manukhb9a9062b2020-10-16 22:51:1754 std::vector<TitledUrlMatch> GetResultsMatching(
Jan Wilken Dörriefa241ba2021-03-11 17:57:0155 const std::u16string& query,
manukhb9a9062b2020-10-16 22:51:1756 size_t max_count,
manukh92134fdb2020-11-05 20:33:0157 query_parser::MatchingAlgorithm matching_algorithm,
58 bool match_ancestor_titles);
[email protected]85d911c2009-05-19 03:59:4259
manukhc7370b62020-10-29 20:22:1360 // For testing only.
61 TitledUrlNodeSet RetrieveNodesMatchingAllTermsForTesting(
Jan Wilken Dörriefa241ba2021-03-11 17:57:0162 const std::vector<std::u16string>& terms,
manukhc7370b62020-10-29 20:22:1363 query_parser::MatchingAlgorithm matching_algorithm) const {
64 return RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
65 }
66
manukh92134fdb2020-11-05 20:33:0167 // For testing only.
68 TitledUrlNodeSet RetrieveNodesMatchingAnyTermsForTesting(
Jan Wilken Dörriefa241ba2021-03-11 17:57:0169 const std::vector<std::u16string>& terms,
manukh92134fdb2020-11-05 20:33:0170 query_parser::MatchingAlgorithm matching_algorithm) const {
71 return RetrieveNodesMatchingAnyTerms(terms, matching_algorithm);
72 }
73
[email protected]85d911c2009-05-19 03:59:4274 private:
mattreynolds55324d62016-12-09 23:07:2975 using TitledUrlNodes = std::vector<const TitledUrlNode*>;
Jan Wilken Dörriefa241ba2021-03-11 17:57:0176 using Index = std::map<std::u16string, TitledUrlNodeSet>;
[email protected]85d911c2009-05-19 03:59:4277
mattreynolds25e9a312016-12-14 21:52:1378 // Constructs |sorted_nodes| by copying the matches in |matches| and sorting
79 // them.
mattreynolds55324d62016-12-09 23:07:2980 void SortMatches(const TitledUrlNodeSet& matches,
81 TitledUrlNodes* sorted_nodes) const;
[email protected]2c685cc22009-08-28 00:17:4482
manukhb9a9062b2020-10-16 22:51:1783 // Finds |query_nodes| matches in |node| and returns a TitledUrlMatch
84 // containing |node| and the matches.
Anton Bikineev1156b5f2021-05-15 22:35:3685 absl::optional<TitledUrlMatch> MatchTitledUrlNodeWithQuery(
manukhb9a9062b2020-10-16 22:51:1786 const TitledUrlNode* node,
manukh92134fdb2020-11-05 20:33:0187 const query_parser::QueryNodeVector& query_nodes,
88 bool match_ancestor_titles);
[email protected]85d911c2009-05-19 03:59:4289
manukhc7370b62020-10-29 20:22:1390 // Return matches for the specified |terms|. This is an intersection of each
91 // term's matches.
92 TitledUrlNodeSet RetrieveNodesMatchingAllTerms(
Jan Wilken Dörriefa241ba2021-03-11 17:57:0193 const std::vector<std::u16string>& terms,
manukhc7370b62020-10-29 20:22:1394 query_parser::MatchingAlgorithm matching_algorithm) const;
95
manukh92134fdb2020-11-05 20:33:0196 TitledUrlNodeSet RetrieveNodesMatchingAnyTerms(
Jan Wilken Dörriefa241ba2021-03-11 17:57:0197 const std::vector<std::u16string>& terms,
manukh92134fdb2020-11-05 20:33:0198 query_parser::MatchingAlgorithm matching_algorithm) const;
99
100 // Return matches for the specified |term|. May return duplicates.
101 TitledUrlNodes RetrieveNodesMatchingTerm(
Jan Wilken Dörriefa241ba2021-03-11 17:57:01102 const std::u16string& term,
manukhc7370b62020-10-29 20:22:13103 query_parser::MatchingAlgorithm matching_algorithm) const;
[email protected]85d911c2009-05-19 03:59:42104
105 // Returns the set of query words from |query|.
Jan Wilken Dörriefa241ba2021-03-11 17:57:01106 static std::vector<std::u16string> ExtractQueryWords(
107 const std::u16string& query);
manukhc7370b62020-10-29 20:22:13108
109 // Return the index terms for |node|.
Jan Wilken Dörriefa241ba2021-03-11 17:57:01110 static std::vector<std::u16string> ExtractIndexTerms(
manukhc7370b62020-10-29 20:22:13111 const TitledUrlNode* node);
[email protected]85d911c2009-05-19 03:59:42112
113 // Adds |node| to |index_|.
Jan Wilken Dörriefa241ba2021-03-11 17:57:01114 void RegisterNode(const std::u16string& term, const TitledUrlNode* node);
[email protected]85d911c2009-05-19 03:59:42115
116 // Removes |node| from |index_|.
Jan Wilken Dörriefa241ba2021-03-11 17:57:01117 void UnregisterNode(const std::u16string& term, const TitledUrlNode* node);
[email protected]85d911c2009-05-19 03:59:42118
119 Index index_;
120
mattreynolds191b88722016-12-13 19:25:32121 std::unique_ptr<TitledUrlNodeSorter> sorter_;
[email protected]2c685cc22009-08-28 00:17:44122
mattreynolds25e9a312016-12-14 21:52:13123 DISALLOW_COPY_AND_ASSIGN(TitledUrlIndex);
[email protected]85d911c2009-05-19 03:59:42124};
125
[email protected]c45f5622014-06-01 21:35:48126} // namespace bookmarks
127
mattreynolds25e9a312016-12-14 21:52:13128#endif // COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_