| // Copyright 2014 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |
| #define COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |
| |
| #include <stddef.h> |
| |
| #include <map> |
| #include <memory> |
| #include <set> |
| #include <string> |
| #include <vector> |
| |
| #include "base/containers/flat_set.h" |
| #include "base/feature_list.h" |
| #include "components/bookmarks/browser/titled_url_node_sorter.h" |
| #include "components/query_parser/query_parser.h" |
| #include "third_party/abseil-cpp/absl/types/optional.h" |
| |
| namespace bookmarks { |
| |
| class TitledUrlNode; |
| |
| struct TitledUrlMatch; |
| |
| // TitledUrlIndex maintains an index of paired titles and URLs for quick lookup. |
| // |
| // TitledUrlIndex maintains the index (index_) as a map of sets. The map (type |
| // Index) maps from a lower case string to the set (type TitledUrlNodeSet) of |
| // TitledUrlNodes that contain that string in their title or URL. |
| class TitledUrlIndex { |
| public: |
| using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>; |
| |
| // Constructs a TitledUrlIndex. |sorter| is used to construct a sorted list |
| // of matches when matches are returned from the index. If null, matches are |
| // returned unsorted. |
| explicit TitledUrlIndex( |
| std::unique_ptr<TitledUrlNodeSorter> sorter = nullptr); |
| |
| TitledUrlIndex(const TitledUrlIndex&) = delete; |
| TitledUrlIndex& operator=(const TitledUrlIndex&) = delete; |
| |
| ~TitledUrlIndex(); |
| |
| void SetNodeSorter(std::unique_ptr<TitledUrlNodeSorter> sorter); |
| |
| // Invoked when a title/URL pair has been added to the model. |
| void Add(const TitledUrlNode* node); |
| |
| // Invoked when a title/URL pair has been removed from the model. |
| void Remove(const TitledUrlNode* node); |
| |
| // Invoked when a folder has been added to the model. |
| void AddPath(const TitledUrlNode* node); |
| |
| // Invoked when a folder has been removed from the model. |
| void RemovePath(const TitledUrlNode* node); |
| |
| // Returns up to `max_count` of matches containing each term from the text |
| // `query` in either the title, URL, or the titles of ancestor nodes. |
| // `matching_algorithm` determines the algorithm used by QueryParser |
| // internally to parse `query`. |
| std::vector<TitledUrlMatch> GetResultsMatching( |
| const std::u16string& query, |
| size_t max_count, |
| query_parser::MatchingAlgorithm matching_algorithm); |
| |
| // Returns a normalized version of the UTF16 string `text`. If it fails to |
| // normalize the string, returns `text` itself as a best-effort. |
| static std::u16string Normalize(const std::u16string& text); |
| |
| private: |
| friend class TitledUrlIndexFake; |
| |
| using TitledUrlNodes = std::vector<const TitledUrlNode*>; |
| using Index = std::map<std::u16string, TitledUrlNodeSet>; |
| |
| // Constructs |sorted_nodes| by copying the matches in |matches| and sorting |
| // them. |
| void SortMatches(const TitledUrlNodeSet& matches, |
| TitledUrlNodes* sorted_nodes) const; |
| |
| // For each node, calls `MatchTitledUrlNodeWithQuery()` and returns the |
| // aggregated `TitledUrlMatch`s. |
| std::vector<TitledUrlMatch> MatchTitledUrlNodesWithQuery( |
| const TitledUrlNodes& nodes, |
| const query_parser::QueryNodeVector& query_nodes, |
| const std::vector<std::u16string>& query_terms, |
| size_t max_count); |
| |
| // Finds |query_nodes| matches in |node| and returns a TitledUrlMatch |
| // containing |node| and the matches. |
| absl::optional<TitledUrlMatch> MatchTitledUrlNodeWithQuery( |
| const TitledUrlNode* node, |
| const query_parser::QueryNodeVector& query_nodes, |
| const std::vector<std::u16string>& query_terms); |
| |
| // Return matches for the specified |terms|. This is an intersection of each |
| // term's matches. |
| TitledUrlNodeSet RetrieveNodesMatchingAllTerms( |
| const std::vector<std::u16string>& terms, |
| query_parser::MatchingAlgorithm matching_algorithm) const; |
| |
| // Return matches for the specified `terms`. This is approximately a union of |
| // each term's match, with some limitations to avoid too many nodes being |
| // returned: terms shorter than `term_min_length` or matching more than |
| // `max_nodes_per_term` nodes won't have their nodes accumulated by union; and |
| // accumulation is capped to `max_nodes`. Guaranteed to include any node |
| // `RetrieveNodesMatchingAllTerms()` includes. |
| TitledUrlNodeSet RetrieveNodesMatchingAnyTerms( |
| const std::vector<std::u16string>& terms, |
| query_parser::MatchingAlgorithm matching_algorithm, |
| size_t max_nodes) const; |
| |
| // Return matches for the specified |term|. May return duplicates. |
| TitledUrlNodes RetrieveNodesMatchingTerm( |
| const std::u16string& term, |
| query_parser::MatchingAlgorithm matching_algorithm) const; |
| |
| // Return true if `term` matches any path. in `path_index_`. |
| bool DoesTermMatchPath( |
| const std::u16string& term, |
| query_parser::MatchingAlgorithm matching_algorithm) const; |
| |
| // Returns the set of query words from |query|. |
| static std::vector<std::u16string> ExtractQueryWords( |
| const std::u16string& query); |
| |
| // Return the index terms for |node|. |
| static std::vector<std::u16string> ExtractIndexTerms( |
| const TitledUrlNode* node); |
| |
| // Adds |node| to |index_|. |
| void RegisterNode(const std::u16string& term, const TitledUrlNode* node); |
| |
| // Removes |node| from |index_|. |
| void UnregisterNode(const std::u16string& term, const TitledUrlNode* node); |
| |
| // A map of terms and the nodes containing those terms in their titles or |
| // URLs. E.g., given 2 bookmarks titled 'x y x' and 'x z', `index` would |
| // contain: `{ x: set[node1, node2], y: set[node1], z: set[node2] }`. |
| Index index_; |
| // A map of terms and the number of times it occurs in paths. E.g., given |
| // 2 paths 'bookmarks bar/x y x/x' and 'bookmarks bar/x z/x', `path_index_` |
| // would contain `{ bookmarks: 2, bar: 2, x: 4, y: 1, z: 1 }`. Note, 'x' has |
| // count 4, since it occurred twice in each path. Doesn't track actual |
| // bookmark nodes, as the latter would need large updates when moving, |
| // folders. Tracks counts so terms can be unindexed when the last containing |
| // folder is renamed or deleted. Updated on folder rename, creation, and |
| // deletion; not updated on bookmark or folder move. Used to short circuit |
| // unioning per-term matches when matching paths, as intersecting results in |
| // much fewer nodes. |
| std::map<std::u16string, size_t> path_index_; |
| |
| std::unique_ptr<TitledUrlNodeSorter> sorter_; |
| }; |
| |
| } // namespace bookmarks |
| |
| #endif // COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |