components/omnibox/browser/url_index_private_data.cc - chromium/src.git - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/omnibox/browser/url_index_private_data.h"

 #include <stdint.h>

 #include <functional>
 #include <iterator>
 #include <limits>
 #include <numeric>
 #include <string>
 #include <vector>

 #include "base/files/file_util.h"
 #include "base/i18n/break_iterator.h"
 #include "base/i18n/case_conversion.h"
 #include "base/macros.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/strings/string_split.h"
 #include "base/strings/string_util.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/time/time.h"
 #include "components/bookmarks/browser/bookmark_model.h"
 #include "components/bookmarks/browser/bookmark_utils.h"
 #include "components/history/core/browser/history_database.h"
 #include "components/history/core/browser/history_db_task.h"
 #include "components/history/core/browser/history_service.h"
 #include "components/omnibox/browser/in_memory_url_index.h"
 #include "components/search_engines/template_url_service.h"
 #include "components/url_formatter/url_formatter.h"

 #if defined(USE_SYSTEM_PROTOBUF)
 #include <google/protobuf/repeated_field.h>
 #else
 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
 #endif

 using google::protobuf::RepeatedField;
 using google::protobuf::RepeatedPtrField;
 using in_memory_url_index::InMemoryURLIndexCacheItem;

 namespace {
 static const size_t kMaxVisitsToStoreInCache = 10u;
 }

 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem
     WordListItem;
 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry
     WordMapEntry;
 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem
     CharWordMapItem;
 typedef in_memory_url_index::
     InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry CharWordMapEntry;
 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
     WordIDHistoryMapItem;
 typedef in_memory_url_index::
     InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
         WordIDHistoryMapEntry;
 typedef in_memory_url_index::InMemoryURLIndexCacheItem_HistoryInfoMapItem
     HistoryInfoMapItem;
 typedef in_memory_url_index::
     InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
         HistoryInfoMapEntry;
 typedef in_memory_url_index::
     InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo
         HistoryInfoMapEntry_VisitInfo;
 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordStartsMapItem
     WordStartsMapItem;
 typedef in_memory_url_index::
     InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry
         WordStartsMapEntry;

 // Algorithm Functions ---------------------------------------------------------

 // Comparison function for sorting search terms by descending length.
 bool LengthGreater(const base::string16& string_a,
                    const base::string16& string_b) {
   return string_a.length() > string_b.length();
 }


 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------

 // HistoryDBTask used to update the recent visit data for a particular
 // row from the history database.
 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {
  public:
   explicit UpdateRecentVisitsFromHistoryDBTask(
       URLIndexPrivateData* private_data,
       history::URLID url_id);

   bool RunOnDBThread(history::HistoryBackend* backend,
                      history::HistoryDatabase* db) override;
   void DoneRunOnMainThread() override;

  private:
   ~UpdateRecentVisitsFromHistoryDBTask() override;

   // The URLIndexPrivateData that gets updated after the historyDB
   // task returns.
   URLIndexPrivateData* private_data_;
   // The ID of the URL to get visits for and then update.
   history::URLID url_id_;
   // Whether fetching the recent visits for the URL succeeded.
   bool succeeded_;
   // The awaited data that's shown to private_data_ for it to copy and
   // store.
   history::VisitVector recent_visits_;

   DISALLOW_COPY_AND_ASSIGN(UpdateRecentVisitsFromHistoryDBTask);
 };

 UpdateRecentVisitsFromHistoryDBTask::UpdateRecentVisitsFromHistoryDBTask(
     URLIndexPrivateData* private_data,
     history::URLID url_id)
     : private_data_(private_data), url_id_(url_id), succeeded_(false) {
 }

 bool UpdateRecentVisitsFromHistoryDBTask::RunOnDBThread(
     history::HistoryBackend* backend,
     history::HistoryDatabase* db) {
   // Make sure the private data is going to get as many recent visits as
   // ScoredHistoryMatch::GetFrequency() hopes to use.
   DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);
   succeeded_ = db->GetMostRecentVisitsForURL(url_id_,
                                              kMaxVisitsToStoreInCache,
                                              &recent_visits_);
   if (!succeeded_)
     recent_visits_.clear();
   return true;  // Always claim to be done; do not retry failures.
 }

 void UpdateRecentVisitsFromHistoryDBTask::DoneRunOnMainThread() {
   if (succeeded_)
     private_data_->UpdateRecentVisits(url_id_, recent_visits_);
 }

 UpdateRecentVisitsFromHistoryDBTask::~UpdateRecentVisitsFromHistoryDBTask() {
 }


 // URLIndexPrivateData ---------------------------------------------------------

 URLIndexPrivateData::URLIndexPrivateData()
     : restored_cache_version_(0),
       saved_cache_version_(kCurrentCacheFileVersion),
       pre_filter_item_count_(0),
       post_filter_item_count_(0),
       post_scoring_item_count_(0) {
 }

 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
     base::string16 original_search_string,
     size_t cursor_position,
     size_t max_matches,
     bookmarks::BookmarkModel* bookmark_model,
     TemplateURLService* template_url_service) {
   pre_filter_item_count_ = 0;
   post_filter_item_count_ = 0;
   post_scoring_item_count_ = 0;

   // This list will contain the original search string and any other string
   // transformations.
   String16Vector search_strings;
   search_strings.push_back(original_search_string);
   if ((cursor_position != base::string16::npos) &&
       (cursor_position < original_search_string.length()) &&
       (cursor_position > 0)) {
     // The original search_string broken at cursor position. This is one type of
     // transformation.
     base::string16 transformed_search_string(original_search_string);
     transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
     search_strings.push_back(transformed_search_string);
   }

   ScoredHistoryMatches scored_items;
   // Invalidate the term cache and return if we have indexed no words (probably
   // because we've not been initialized yet).
   if (word_list_.empty()) {
     search_term_cache_.clear();
     return scored_items;
   }
   // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
   // approach.
   ResetSearchTermCache();

   for (const base::string16& search_string : search_strings) {
     // The search string we receive may contain escaped characters. For reducing
     // the index we need individual, lower-cased words, ignoring escapings. For
     // the final filtering we need whitespace separated substrings possibly
     // containing escaped characters.
     base::string16 lower_raw_string(base::i18n::ToLower(search_string));
     base::string16 lower_unescaped_string = net::UnescapeURLComponent(
         lower_raw_string,
         net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
             net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

     // Extract individual 'words' (as opposed to 'terms'; see below) from the
     // search string. When the user types "colspec=ID%20Mstone Release" we get
     // four 'words': "colspec", "id", "mstone" and "release".
     String16Vector lower_words(
         String16VectorFromString16(lower_unescaped_string, false, nullptr));
     if (lower_words.empty())
       continue;
     HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
     pre_filter_item_count_ += history_id_set.size();
     // Trim the candidate pool if it is large. Note that we do not filter out
     // items that do not contain the search terms as proper substrings --
     // doing so is the performance-costly operation we are trying to avoid in
     // order to maintain omnibox responsiveness.
     const size_t kItemsToScoreLimit = 500;
     if (history_id_set.size() > kItemsToScoreLimit) {
       HistoryIDVector history_ids;
       std::copy(history_id_set.begin(), history_id_set.end(),
                 std::back_inserter(history_ids));
       // Trim down the set by sorting by typed-count, visit-count, and last
       // visit.
       HistoryItemFactorGreater item_factor_functor(history_info_map_);
       std::partial_sort(history_ids.begin(),
                         history_ids.begin() + kItemsToScoreLimit,
                         history_ids.end(), item_factor_functor);
       history_id_set.clear();
       std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
                 std::inserter(history_id_set, history_id_set.end()));
       post_filter_item_count_ += history_id_set.size();
     } else {
       post_filter_item_count_ += pre_filter_item_count_;
     }
     // Pass over all of the candidates filtering out any without a proper
     // substring match, inserting those which pass in order by score. Note that
     // in this step we are using the raw search string complete with escaped
     // URL elements. When the user has specifically typed something akin to
     // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
     // specific substring appears in the URL or page title.

     // We call these 'terms' (as opposed to 'words'; see above) as in this case
     // we only want to break up the search string on 'true' whitespace rather
     // than escaped whitespace. When the user types "colspec=ID%20Mstone
     // Release" we get two 'terms': "colspec=id%20mstone" and "release".
     String16Vector lower_raw_terms =
         base::SplitString(lower_raw_string, base::kWhitespaceUTF16,
                           base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

     // Don't score matches when there are no terms to score against.  (It's
     // possible that the word break iterater that extracts words to search
     // for in the database allows some whitespace "words" whereas SplitString
     // excludes a long list of whitespace.)  One could write a scoring function
     // that gives a reasonable order to matches when there are no terms (i.e.,
     // all the words are some form of whitespace), but this is such a rare edge
     // case that it's not worth the time.
     if (lower_raw_terms.empty())
       continue;
     ScoredHistoryMatches temp_scored_items =
         std::for_each(history_id_set.begin(), history_id_set.end(),
                       AddHistoryMatch(bookmark_model, template_url_service,
                                       *this, lower_raw_string, lower_raw_terms,
                                       base::Time::Now()))
             .ScoredMatches();
     scored_items.insert(scored_items.end(), temp_scored_items.begin(),
                         temp_scored_items.end());
   }
   // Select and sort only the top |max_matches| results.
   if (scored_items.size() > max_matches) {
     std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,
                       scored_items.end(),
                       ScoredHistoryMatch::MatchScoreGreater);
     scored_items.resize(max_matches);
   } else {
     std::sort(scored_items.begin(), scored_items.end(),
               ScoredHistoryMatch::MatchScoreGreater);
   }
   post_scoring_item_count_ = scored_items.size();
   if (pre_filter_item_count_ > post_filter_item_count_) {
     // If we trim the results set we do not want to cache the results for next
     // time as the user's ultimately desired result could easily be eliminated
     // in the early rough filter.
     search_term_cache_.clear();
   } else {
     // Remove any stale SearchTermCacheItems.
     for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
          cache_iter != search_term_cache_.end(); ) {
       if (!cache_iter->second.used_)
         search_term_cache_.erase(cache_iter++);
       else
         ++cache_iter;
     }
   }

   return scored_items;
 }

 bool URLIndexPrivateData::UpdateURL(
     history::HistoryService* history_service,
     const history::URLRow& row,
     const std::set<std::string>& scheme_whitelist,
     base::CancelableTaskTracker* tracker) {
   // The row may or may not already be in our index. If it is not already
   // indexed and it qualifies then it gets indexed. If it is already
   // indexed and still qualifies then it gets updated, otherwise it
   // is deleted from the index.
   bool row_was_updated = false;
   history::URLID row_id = row.id();
   HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
   if (row_pos == history_info_map_.end()) {
     // This new row should be indexed if it qualifies.
     history::URLRow new_row(row);
     new_row.set_id(row_id);
     row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
                       IndexRow(nullptr,
                                history_service,
                                new_row,
                                scheme_whitelist,
                                tracker);
   } else if (RowQualifiesAsSignificant(row, base::Time())) {
     // This indexed row still qualifies and will be re-indexed.
     // The url won't have changed but the title, visit count, etc.
     // might have changed.
     history::URLRow& row_to_update = row_pos->second.url_row;
     bool title_updated = row_to_update.title() != row.title();
     if (row_to_update.visit_count() != row.visit_count() ||
         row_to_update.typed_count() != row.typed_count() ||
         row_to_update.last_visit() != row.last_visit() || title_updated) {
       row_to_update.set_visit_count(row.visit_count());
       row_to_update.set_typed_count(row.typed_count());
       row_to_update.set_last_visit(row.last_visit());
       // If something appears to have changed, update the recent visits
       // information.
       ScheduleUpdateRecentVisits(history_service, row_id, tracker);
       // While the URL is guaranteed to remain stable, the title may have
       // changed. If so, then update the index with the changed words.
       if (title_updated) {
         // Clear all words associated with this row and re-index both the
         // URL and title.
         RemoveRowWordsFromIndex(row_to_update);
         row_to_update.set_title(row.title());
         RowWordStarts word_starts;
         AddRowWordsToIndex(row_to_update, &word_starts);
         word_starts_map_[row_id] = word_starts;
       }
       row_was_updated = true;
     }
   } else {
     // This indexed row no longer qualifies and will be de-indexed by
     // clearing all words associated with this row.
     RemoveRowFromIndex(row);
     row_was_updated = true;
   }
   if (row_was_updated)
     search_term_cache_.clear();  // This invalidates the cache.
   return row_was_updated;
 }

 void URLIndexPrivateData::UpdateRecentVisits(
     history::URLID url_id,
     const history::VisitVector& recent_visits) {
   HistoryInfoMap::iterator row_pos = history_info_map_.find(url_id);
   if (row_pos != history_info_map_.end()) {
     VisitInfoVector* visits = &row_pos->second.visits;
     visits->clear();
     const size_t size =
         std::min(recent_visits.size(), kMaxVisitsToStoreInCache);
     visits->reserve(size);
     for (size_t i = 0; i < size; i++) {
       // Copy from the history::VisitVector the only fields visits needs.
       visits->push_back(std::make_pair(recent_visits[i].visit_time,
                                        recent_visits[i].transition));
     }
   }
   // Else: Oddly, the URL doesn't seem to exist in the private index.
   // Ignore this update.  This can happen if, for instance, the user
   // removes the URL from URLIndexPrivateData before the historyDB call
   // returns.
 }

 void URLIndexPrivateData::ScheduleUpdateRecentVisits(
     history::HistoryService* history_service,
     history::URLID url_id,
     base::CancelableTaskTracker* tracker) {
   history_service->ScheduleDBTask(
       std::unique_ptr<history::HistoryDBTask>(
           new UpdateRecentVisitsFromHistoryDBTask(this, url_id)),
       tracker);
 }

 // Helper functor for DeleteURL.
 class HistoryInfoMapItemHasURL {
  public:
   explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

   bool operator()(const std::pair<const HistoryID, HistoryInfoMapValue>& item) {
     return item.second.url_row.url() == url_;
   }

  private:
   const GURL& url_;
 };

 bool URLIndexPrivateData::DeleteURL(const GURL& url) {
   // Find the matching entry in the history_info_map_.
   HistoryInfoMap::iterator pos = std::find_if(
       history_info_map_.begin(),
       history_info_map_.end(),
       HistoryInfoMapItemHasURL(url));
   if (pos == history_info_map_.end())
     return false;
   RemoveRowFromIndex(pos->second.url_row);
   search_term_cache_.clear();  // This invalidates the cache.
   return true;
 }

 // static
 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
     const base::FilePath& file_path) {
   base::TimeTicks beginning_time = base::TimeTicks::Now();
   if (!base::PathExists(file_path))
     return nullptr;
   std::string data;
   // If there is no cache file then simply give up. This will cause us to
   // attempt to rebuild from the history database.
   if (!base::ReadFileToString(file_path, &data))
     return nullptr;

   scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
   InMemoryURLIndexCacheItem index_cache;
   if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
     LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
                  << file_path.value();
     return restored_data;
   }

   if (!restored_data->RestorePrivateData(index_cache))
     return nullptr;

   UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
                       base::TimeTicks::Now() - beginning_time);
   UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
                        restored_data->history_id_word_map_.size());
   UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
   UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
                              restored_data->word_map_.size());
   UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
                              restored_data->char_word_map_.size());
   if (restored_data->Empty())
     return nullptr;  // 'No data' is the same as a failed reload.
   return restored_data;
 }

 // static
 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
     history::HistoryDatabase* history_db,
     const std::set<std::string>& scheme_whitelist) {
   if (!history_db)
     return nullptr;

   base::TimeTicks beginning_time = base::TimeTicks::Now();

   scoped_refptr<URLIndexPrivateData>
       rebuilt_data(new URLIndexPrivateData);
   history::URLDatabase::URLEnumerator history_enum;
   if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
     return nullptr;
   rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();
   for (history::URLRow row; history_enum.GetNextURL(&row);) {
     rebuilt_data->IndexRow(
         history_db, nullptr, row, scheme_whitelist, nullptr);
   }

   UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
                       base::TimeTicks::Now() - beginning_time);
   UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
                        rebuilt_data->history_id_word_map_.size());
   UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
                              rebuilt_data->word_map_.size());
   UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
                              rebuilt_data->char_word_map_.size());
   return rebuilt_data;
 }

 // static
 bool URLIndexPrivateData::WritePrivateDataToCacheFileTask(
     scoped_refptr<URLIndexPrivateData> private_data,
     const base::FilePath& file_path) {
   DCHECK(private_data.get());
   DCHECK(!file_path.empty());
   return private_data->SaveToFile(file_path);
 }

 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {
   scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;
   data_copy->last_time_rebuilt_from_history_ = last_time_rebuilt_from_history_;
   data_copy->word_list_ = word_list_;
   data_copy->available_words_ = available_words_;
   data_copy->word_map_ = word_map_;
   data_copy->char_word_map_ = char_word_map_;
   data_copy->word_id_history_map_ = word_id_history_map_;
   data_copy->history_id_word_map_ = history_id_word_map_;
   data_copy->history_info_map_ = history_info_map_;
   data_copy->word_starts_map_ = word_starts_map_;
   return data_copy;
   // Not copied:
   //    search_term_cache_
   //    pre_filter_item_count_
   //    post_filter_item_count_
   //    post_scoring_item_count_
 }

 bool URLIndexPrivateData::Empty() const {
   return history_info_map_.empty();
 }

 void URLIndexPrivateData::Clear() {
   last_time_rebuilt_from_history_ = base::Time();
   word_list_.clear();
   available_words_.clear();
   word_map_.clear();
   char_word_map_.clear();
   word_id_history_map_.clear();
   history_id_word_map_.clear();
   history_info_map_.clear();
   word_starts_map_.clear();
 }

 URLIndexPrivateData::~URLIndexPrivateData() {}

 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
     const String16Vector& unsorted_words) {
   // Break the terms down into individual terms (words), get the candidate
   // set for each term, and intersect each to get a final candidate list.
   // Note that a single 'term' from the user's perspective might be
   // a string like "http://www.somewebsite.com" which, from our perspective,
   // is four words: 'http', 'www', 'somewebsite', and 'com'.
   HistoryIDSet history_id_set;
   String16Vector words(unsorted_words);
   // Sort the words into the longest first as such are likely to narrow down
   // the results quicker. Also, single character words are the most expensive
   // to process so save them for last.
   std::sort(words.begin(), words.end(), LengthGreater);
   for (String16Vector::iterator iter = words.begin(); iter != words.end();
        ++iter) {
     base::string16 uni_word = *iter;
     HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
     if (term_history_set.empty()) {
       history_id_set.clear();
       break;
     }
     if (iter == words.begin()) {
       history_id_set.swap(term_history_set);
     } else {
       HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(
           history_id_set, term_history_set);
       history_id_set.swap(new_history_id_set);
     }
   }
   return history_id_set;
 }

 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
     const base::string16& term) {
   if (term.empty())
     return HistoryIDSet();

   // TODO(mrossetti): Consider optimizing for very common terms such as
   // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
   // occuring words in the user's searches.

   size_t term_length = term.length();
   WordIDSet word_id_set;
   if (term_length > 1) {
     // See if this term or a prefix thereof is present in the cache.
     base::string16 term_lower = base::i18n::ToLower(term);
     SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
     for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
          cache_iter != search_term_cache_.end(); ++cache_iter) {
       if (base::StartsWith(term_lower,
                            base::i18n::ToLower(cache_iter->first),
                            base::CompareCase::SENSITIVE) &&
           (best_prefix == search_term_cache_.end() ||
            cache_iter->first.length() > best_prefix->first.length()))
         best_prefix = cache_iter;
     }

     // If a prefix was found then determine the leftover characters to be used
     // for further refining the results from that prefix.
     Char16Set prefix_chars;
     base::string16 leftovers(term);
     if (best_prefix != search_term_cache_.end()) {
       // If the prefix is an exact match for the term then grab the cached
       // results and we're done.
       size_t prefix_length = best_prefix->first.length();
       if (prefix_length == term_length) {
         best_prefix->second.used_ = true;
         return best_prefix->second.history_id_set_;
       }

       // Otherwise we have a handy starting point.
       // If there are no history results for this prefix then we can bail early
       // as there will be no history results for the full term.
       if (best_prefix->second.history_id_set_.empty()) {
         search_term_cache_[term] = SearchTermCacheItem();
         return HistoryIDSet();
       }
       word_id_set = best_prefix->second.word_id_set_;
       prefix_chars = Char16SetFromString16(best_prefix->first);
       leftovers = term.substr(prefix_length);
     }

     // Filter for each remaining, unique character in the term.
     Char16Set leftover_chars = Char16SetFromString16(leftovers);
     Char16Set unique_chars =
         base::STLSetDifference<Char16Set>(leftover_chars, prefix_chars);

     // Reduce the word set with any leftover, unprocessed characters.
     if (!unique_chars.empty()) {
       WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
       // We might come up empty on the leftovers.
       if (leftover_set.empty()) {
         search_term_cache_[term] = SearchTermCacheItem();
         return HistoryIDSet();
       }
       // Or there may not have been a prefix from which to start.
       if (prefix_chars.empty()) {
         word_id_set.swap(leftover_set);
       } else {
         WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
             word_id_set, leftover_set);
         word_id_set.swap(new_word_id_set);
       }
     }

     // We must filter the word list because the resulting word set surely
     // contains words which do not have the search term as a proper subset.
     for (WordIDSet::iterator word_set_iter = word_id_set.begin();
          word_set_iter != word_id_set.end(); ) {
       if (word_list_[*word_set_iter].find(term) == base::string16::npos)
         word_id_set.erase(word_set_iter++);
       else
         ++word_set_iter;
     }
   } else {
     word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
   }

   // If any words resulted then we can compose a set of history IDs by unioning
   // the sets from each word.
   HistoryIDSet history_id_set;
   if (!word_id_set.empty()) {
     for (WordIDSet::iterator word_id_iter = word_id_set.begin();
          word_id_iter != word_id_set.end(); ++word_id_iter) {
       WordID word_id = *word_id_iter;
       WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
       if (word_iter != word_id_history_map_.end()) {
         HistoryIDSet& word_history_id_set(word_iter->second);
         history_id_set.insert(word_history_id_set.begin(),
                               word_history_id_set.end());
       }
     }
   }

   // Record a new cache entry for this word if the term is longer than
   // a single character.
   if (term_length > 1)
     search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);

   return history_id_set;
 }

 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
     const Char16Set& term_chars) {
   WordIDSet word_id_set;
   for (Char16Set::const_iterator c_iter = term_chars.begin();
        c_iter != term_chars.end(); ++c_iter) {
     CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
     if (char_iter == char_word_map_.end()) {
       // A character was not found so there are no matching results: bail.
       word_id_set.clear();
       break;
     }
     WordIDSet& char_word_id_set(char_iter->second);
     // It is possible for there to no longer be any words associated with
     // a particular character. Give up in that case.
     if (char_word_id_set.empty()) {
       word_id_set.clear();
       break;
     }

     if (c_iter == term_chars.begin()) {
       // First character results becomes base set of results.
       word_id_set = char_word_id_set;
     } else {
       // Subsequent character results get intersected in.
       WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
           word_id_set, char_word_id_set);
       word_id_set.swap(new_word_id_set);
     }
   }
   return word_id_set;
 }

 bool URLIndexPrivateData::IndexRow(
     history::HistoryDatabase* history_db,
     history::HistoryService* history_service,
     const history::URLRow& row,
     const std::set<std::string>& scheme_whitelist,
     base::CancelableTaskTracker* tracker) {
   const GURL& gurl(row.url());

   // Index only URLs with a whitelisted scheme.
   if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
     return false;

   history::URLID row_id = row.id();
   // Strip out username and password before saving and indexing.
   base::string16 url(url_formatter::FormatUrl(
       gurl, url_formatter::kFormatUrlOmitUsernamePassword,
       net::UnescapeRule::NONE, nullptr, nullptr, nullptr));

   HistoryID history_id = static_cast<HistoryID>(row_id);
   DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

   // Add the row for quick lookup in the history info store.
   history::URLRow new_row(GURL(url), row_id);
   new_row.set_visit_count(row.visit_count());
   new_row.set_typed_count(row.typed_count());
   new_row.set_last_visit(row.last_visit());
   new_row.set_title(row.title());
   history_info_map_[history_id].url_row = new_row;

   // Index the words contained in the URL and title of the row.
   RowWordStarts word_starts;
   AddRowWordsToIndex(new_row, &word_starts);
   word_starts_map_[history_id] = word_starts;

   // Update the recent visits information or schedule the update
   // as appropriate.
   if (history_db) {
     // We'd like to check that we're on the history DB thread.
     // However, unittest code actually calls this on the UI thread.
     // So we don't do any thread checks.
     history::VisitVector recent_visits;
     // Make sure the private data is going to get as many recent visits as
     // ScoredHistoryMatch::GetFrequency() hopes to use.
     DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);
     if (history_db->GetMostRecentVisitsForURL(row_id,
                                               kMaxVisitsToStoreInCache,
                                               &recent_visits))
       UpdateRecentVisits(row_id, recent_visits);
   } else {
     DCHECK(tracker);
     DCHECK(history_service);
     ScheduleUpdateRecentVisits(history_service, row_id, tracker);
   }

   return true;
 }

 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row,
                                              RowWordStarts* word_starts) {
   HistoryID history_id = static_cast<HistoryID>(row.id());
   // Split URL into individual, unique words then add in the title words.
   const GURL& gurl(row.url());
   const base::string16& url =
       bookmarks::CleanUpUrlForMatching(gurl, nullptr);
   String16Set url_words = String16SetFromString16(url,
       word_starts ? &word_starts->url_word_starts_ : nullptr);
   const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());
   String16Set title_words = String16SetFromString16(title,
       word_starts ? &word_starts->title_word_starts_ : nullptr);
   String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);
   for (String16Set::iterator word_iter = words.begin();
        word_iter != words.end(); ++word_iter)
     AddWordToIndex(*word_iter, history_id);

   search_term_cache_.clear();  // Invalidate the term cache.
 }

 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,
                                          HistoryID history_id) {
   WordMap::iterator word_pos = word_map_.find(term);
   if (word_pos != word_map_.end())
     UpdateWordHistory(word_pos->second, history_id);
   else
     AddWordHistory(term, history_id);
 }

 void URLIndexPrivateData::AddWordHistory(const base::string16& term,
                                          HistoryID history_id) {
   WordID word_id = word_list_.size();
   if (available_words_.empty()) {
     word_list_.push_back(term);
   } else {
     word_id = *(available_words_.begin());
     word_list_[word_id] = term;
     available_words_.erase(word_id);
   }
   word_map_[term] = word_id;

   HistoryIDSet history_id_set;
   history_id_set.insert(history_id);
   word_id_history_map_[word_id] = history_id_set;
   AddToHistoryIDWordMap(history_id, word_id);

   // For each character in the newly added word (i.e. a word that is not
   // already in the word index), add the word to the character index.
   Char16Set characters = Char16SetFromString16(term);
   for (Char16Set::iterator uni_char_iter = characters.begin();
        uni_char_iter != characters.end(); ++uni_char_iter) {
     base::char16 uni_char = *uni_char_iter;
     CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
     if (char_iter != char_word_map_.end()) {
       // Update existing entry in the char/word index.
       WordIDSet& word_id_set(char_iter->second);
       word_id_set.insert(word_id);
     } else {
       // Create a new entry in the char/word index.
       WordIDSet word_id_set;
       word_id_set.insert(word_id);
       char_word_map_[uni_char] = word_id_set;
     }
   }
 }

 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
                                             HistoryID history_id) {
   WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
   DCHECK(history_pos != word_id_history_map_.end());
   HistoryIDSet& history_id_set(history_pos->second);
   history_id_set.insert(history_id);
   AddToHistoryIDWordMap(history_id, word_id);
 }

 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
                                                 WordID word_id) {
   HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
   if (iter != history_id_word_map_.end()) {
     WordIDSet& word_id_set(iter->second);
     word_id_set.insert(word_id);
   } else {
     WordIDSet word_id_set;
     word_id_set.insert(word_id);
     history_id_word_map_[history_id] = word_id_set;
   }
 }

 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {
   RemoveRowWordsFromIndex(row);
   HistoryID history_id = static_cast<HistoryID>(row.id());
   history_info_map_.erase(history_id);
   word_starts_map_.erase(history_id);
 }

 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {
   // Remove the entries in history_id_word_map_ and word_id_history_map_ for
   // this row.
   HistoryID history_id = static_cast<HistoryID>(row.id());
   WordIDSet word_id_set = history_id_word_map_[history_id];
   history_id_word_map_.erase(history_id);

   // Reconcile any changes to word usage.
   for (WordIDSet::iterator word_id_iter = word_id_set.begin();
        word_id_iter != word_id_set.end(); ++word_id_iter) {
     WordID word_id = *word_id_iter;
     word_id_history_map_[word_id].erase(history_id);
     if (!word_id_history_map_[word_id].empty())
       continue;  // The word is still in use.

     // The word is no longer in use. Reconcile any changes to character usage.
     base::string16 word = word_list_[word_id];
     Char16Set characters = Char16SetFromString16(word);
     for (Char16Set::iterator uni_char_iter = characters.begin();
          uni_char_iter != characters.end(); ++uni_char_iter) {
       base::char16 uni_char = *uni_char_iter;
       char_word_map_[uni_char].erase(word_id);
       if (char_word_map_[uni_char].empty())
         char_word_map_.erase(uni_char);  // No longer in use.
     }

     // Complete the removal of references to the word.
     word_id_history_map_.erase(word_id);
     word_map_.erase(word);
     word_list_[word_id] = base::string16();
     available_words_.insert(word_id);
   }
 }

 void URLIndexPrivateData::ResetSearchTermCache() {
   for (SearchTermCacheMap::iterator iter = search_term_cache_.begin();
        iter != search_term_cache_.end(); ++iter)
     iter->second.used_ = false;
 }

 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {
   base::TimeTicks beginning_time = base::TimeTicks::Now();
   InMemoryURLIndexCacheItem index_cache;
   SavePrivateData(&index_cache);
   std::string data;
   if (!index_cache.SerializeToString(&data)) {
     LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
     return false;
   }

   int size = data.size();
   if (base::WriteFile(file_path, data.c_str(), size) != size) {
     LOG(WARNING) << "Failed to write " << file_path.value();
     return false;
   }
   UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
                       base::TimeTicks::Now() - beginning_time);
   return true;
 }

 void URLIndexPrivateData::SavePrivateData(
     InMemoryURLIndexCacheItem* cache) const {
   DCHECK(cache);
   cache->set_last_rebuild_timestamp(
       last_time_rebuilt_from_history_.ToInternalValue());
   cache->set_version(saved_cache_version_);
   // history_item_count_ is no longer used but rather than change the protobuf
   // definition use a placeholder. This will go away with the switch to SQLite.
   cache->set_history_item_count(0);
   SaveWordList(cache);
   SaveWordMap(cache);
   SaveCharWordMap(cache);
   SaveWordIDHistoryMap(cache);
   SaveHistoryInfoMap(cache);
   SaveWordStartsMap(cache);
 }

 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
   if (word_list_.empty())
     return;
   WordListItem* list_item = cache->mutable_word_list();
   list_item->set_word_count(word_list_.size());
   for (String16Vector::const_iterator iter = word_list_.begin();
        iter != word_list_.end(); ++iter)
     list_item->add_word(base::UTF16ToUTF8(*iter));
 }

 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
   if (word_map_.empty())
     return;
   WordMapItem* map_item = cache->mutable_word_map();
   map_item->set_item_count(word_map_.size());
   for (WordMap::const_iterator iter = word_map_.begin();
        iter != word_map_.end(); ++iter) {
     WordMapEntry* map_entry = map_item->add_word_map_entry();
     map_entry->set_word(base::UTF16ToUTF8(iter->first));
     map_entry->set_word_id(iter->second);
   }
 }

 void URLIndexPrivateData::SaveCharWordMap(
     InMemoryURLIndexCacheItem* cache) const {
   if (char_word_map_.empty())
     return;
   CharWordMapItem* map_item = cache->mutable_char_word_map();
   map_item->set_item_count(char_word_map_.size());
   for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
        iter != char_word_map_.end(); ++iter) {
     CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
     map_entry->set_char_16(iter->first);
     const WordIDSet& word_id_set(iter->second);
     map_entry->set_item_count(word_id_set.size());
     for (WordIDSet::const_iterator set_iter = word_id_set.begin();
          set_iter != word_id_set.end(); ++set_iter)
       map_entry->add_word_id(*set_iter);
   }
 }

 void URLIndexPrivateData::SaveWordIDHistoryMap(
     InMemoryURLIndexCacheItem* cache) const {
   if (word_id_history_map_.empty())
     return;
   WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
   map_item->set_item_count(word_id_history_map_.size());
   for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
        iter != word_id_history_map_.end(); ++iter) {
     WordIDHistoryMapEntry* map_entry =
         map_item->add_word_id_history_map_entry();
     map_entry->set_word_id(iter->first);
     const HistoryIDSet& history_id_set(iter->second);
     map_entry->set_item_count(history_id_set.size());
     for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
          set_iter != history_id_set.end(); ++set_iter)
       map_entry->add_history_id(*set_iter);
   }
 }

 void URLIndexPrivateData::SaveHistoryInfoMap(
     InMemoryURLIndexCacheItem* cache) const {
   if (history_info_map_.empty())
     return;
   HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
   map_item->set_item_count(history_info_map_.size());
   for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
        iter != history_info_map_.end(); ++iter) {
     HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
     map_entry->set_history_id(iter->first);
     const history::URLRow& url_row(iter->second.url_row);
     // Note: We only save information that contributes to the index so there
     // is no need to save search_term_cache_ (not persistent).
     map_entry->set_visit_count(url_row.visit_count());
     map_entry->set_typed_count(url_row.typed_count());
     map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
     map_entry->set_url(url_row.url().spec());
     map_entry->set_title(base::UTF16ToUTF8(url_row.title()));
     const VisitInfoVector& visits(iter->second.visits);
     for (VisitInfoVector::const_iterator visit_iter = visits.begin();
          visit_iter != visits.end(); ++visit_iter) {
       HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();
       visit_info->set_visit_time(visit_iter->first.ToInternalValue());
       visit_info->set_transition_type(visit_iter->second);
     }
   }
 }

 void URLIndexPrivateData::SaveWordStartsMap(
     InMemoryURLIndexCacheItem* cache) const {
   if (word_starts_map_.empty())
     return;
   // For unit testing: Enable saving of the cache as an earlier version to
   // allow testing of cache file upgrading in ReadFromFile().
   // TODO(mrossetti): Instead of intruding on production code with this kind of
   // test harness, save a copy of an older version cache with known results.
   // Implement this when switching the caching over to SQLite.
   if (saved_cache_version_ < 1)
     return;

   WordStartsMapItem* map_item = cache->mutable_word_starts_map();
   map_item->set_item_count(word_starts_map_.size());
   for (WordStartsMap::const_iterator iter = word_starts_map_.begin();
        iter != word_starts_map_.end(); ++iter) {
     WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
     map_entry->set_history_id(iter->first);
     const RowWordStarts& word_starts(iter->second);
     for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();
          i != word_starts.url_word_starts_.end(); ++i)
       map_entry->add_url_word_starts(*i);
     for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();
          i != word_starts.title_word_starts_.end(); ++i)
       map_entry->add_title_word_starts(*i);
   }
 }

 bool URLIndexPrivateData::RestorePrivateData(
     const InMemoryURLIndexCacheItem& cache) {
   last_time_rebuilt_from_history_ =
       base::Time::FromInternalValue(cache.last_rebuild_timestamp());
   const base::TimeDelta rebuilt_ago =
       base::Time::Now() - last_time_rebuilt_from_history_;
   if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||
       (rebuilt_ago < base::TimeDelta::FromDays(-1))) {
     // Cache is more than a week old or, somehow, from some time in the future.
     // It's probably a good time to rebuild the index from history to
     // allow synced entries to now appear, expired entries to disappear, etc.
     // Allow one day in the future to make the cache not rebuild on simple
     // system clock changes such as time zone changes.
     return false;
   }
   if (cache.has_version()) {
     if (cache.version() < kCurrentCacheFileVersion) {
       // Don't try to restore an old format cache file.  (This will cause
       // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData
       // from history.)
       return false;
     }
     restored_cache_version_ = cache.version();
   }
   return RestoreWordList(cache) && RestoreWordMap(cache) &&
       RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
       RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);
 }

 bool URLIndexPrivateData::RestoreWordList(
     const InMemoryURLIndexCacheItem& cache) {
   if (!cache.has_word_list())
     return false;
   const WordListItem& list_item(cache.word_list());
   uint32_t expected_item_count = list_item.word_count();
   uint32_t actual_item_count = list_item.word_size();
   if (actual_item_count == 0 || actual_item_count != expected_item_count)
     return false;
   const RepeatedPtrField<std::string>& words(list_item.word());
   for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
        iter != words.end(); ++iter)
     word_list_.push_back(base::UTF8ToUTF16(*iter));
   return true;
 }

 bool URLIndexPrivateData::RestoreWordMap(
     const InMemoryURLIndexCacheItem& cache) {
   if (!cache.has_word_map())
     return false;
   const WordMapItem& list_item(cache.word_map());
   uint32_t expected_item_count = list_item.item_count();
   uint32_t actual_item_count = list_item.word_map_entry_size();
   if (actual_item_count == 0 || actual_item_count != expected_item_count)
     return false;
   const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
   for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
        iter != entries.end(); ++iter)
     word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();
   return true;
 }

 bool URLIndexPrivateData::RestoreCharWordMap(
     const InMemoryURLIndexCacheItem& cache) {
   if (!cache.has_char_word_map())
     return false;
   const CharWordMapItem& list_item(cache.char_word_map());
   uint32_t expected_item_count = list_item.item_count();
   uint32_t actual_item_count = list_item.char_word_map_entry_size();
   if (actual_item_count == 0 || actual_item_count != expected_item_count)
     return false;
   const RepeatedPtrField<CharWordMapEntry>&
       entries(list_item.char_word_map_entry());
   for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =
        entries.begin(); iter != entries.end(); ++iter) {
     expected_item_count = iter->item_count();
     actual_item_count = iter->word_id_size();
     if (actual_item_count == 0 || actual_item_count != expected_item_count)
       return false;
     base::char16 uni_char = static_cast<base::char16>(iter->char_16());
     WordIDSet word_id_set;
     const RepeatedField<int32_t>& word_ids(iter->word_id());
     for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();
          jiter != word_ids.end(); ++jiter)
       word_id_set.insert(*jiter);
     char_word_map_[uni_char] = word_id_set;
   }
   return true;
 }

 bool URLIndexPrivateData::RestoreWordIDHistoryMap(
     const InMemoryURLIndexCacheItem& cache) {
   if (!cache.has_word_id_history_map())
     return false;
   const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
   uint32_t expected_item_count = list_item.item_count();
   uint32_t actual_item_count = list_item.word_id_history_map_entry_size();
   if (actual_item_count == 0 || actual_item_count != expected_item_count)
     return false;
   const RepeatedPtrField<WordIDHistoryMapEntry>&
       entries(list_item.word_id_history_map_entry());
   for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
        entries.begin(); iter != entries.end(); ++iter) {
     expected_item_count = iter->item_count();
     actual_item_count = iter->history_id_size();
     if (actual_item_count == 0 || actual_item_count != expected_item_count)
       return false;
     WordID word_id = iter->word_id();
     HistoryIDSet history_id_set;
     const RepeatedField<int64_t>& history_ids(iter->history_id());
     for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin();
          jiter != history_ids.end(); ++jiter) {
       history_id_set.insert(*jiter);
       AddToHistoryIDWordMap(*jiter, word_id);
     }
     word_id_history_map_[word_id] = history_id_set;
   }
   return true;
 }

 bool URLIndexPrivateData::RestoreHistoryInfoMap(
     const InMemoryURLIndexCacheItem& cache) {
   if (!cache.has_history_info_map())
     return false;
   const HistoryInfoMapItem& list_item(cache.history_info_map());
   uint32_t expected_item_count = list_item.item_count();
   uint32_t actual_item_count = list_item.history_info_map_entry_size();
   if (actual_item_count == 0 || actual_item_count != expected_item_count)
     return false;
   const RepeatedPtrField<HistoryInfoMapEntry>&
       entries(list_item.history_info_map_entry());
   for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
        entries.begin(); iter != entries.end(); ++iter) {
     HistoryID history_id = iter->history_id();
     GURL url(iter->url());
     history::URLRow url_row(url, history_id);
     url_row.set_visit_count(iter->visit_count());
     url_row.set_typed_count(iter->typed_count());
     url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
     if (iter->has_title()) {
       base::string16 title(base::UTF8ToUTF16(iter->title()));
       url_row.set_title(title);
     }
     history_info_map_[history_id].url_row = url_row;

     // Restore visits list.
     VisitInfoVector visits;
     visits.reserve(iter->visits_size());
     for (int i = 0; i < iter->visits_size(); ++i) {
       visits.push_back(std::make_pair(
           base::Time::FromInternalValue(iter->visits(i).visit_time()),
           ui::PageTransitionFromInt(iter->visits(i).transition_type())));
     }
     history_info_map_[history_id].visits = visits;
   }
   return true;
 }

 bool URLIndexPrivateData::RestoreWordStartsMap(
     const InMemoryURLIndexCacheItem& cache) {
   // Note that this function must be called after RestoreHistoryInfoMap() has
   // been run as the word starts may have to be recalculated from the urls and
   // page titles.
   if (cache.has_word_starts_map()) {
     const WordStartsMapItem& list_item(cache.word_starts_map());
     uint32_t expected_item_count = list_item.item_count();
     uint32_t actual_item_count = list_item.word_starts_map_entry_size();
     if (actual_item_count == 0 || actual_item_count != expected_item_count)
       return false;
     const RepeatedPtrField<WordStartsMapEntry>&
         entries(list_item.word_starts_map_entry());
     for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
          entries.begin(); iter != entries.end(); ++iter) {
       HistoryID history_id = iter->history_id();
       RowWordStarts word_starts;
       // Restore the URL word starts.
       const RepeatedField<int32_t>& url_starts(iter->url_word_starts());
       for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin();
            jiter != url_starts.end(); ++jiter)
         word_starts.url_word_starts_.push_back(*jiter);
       // Restore the page title word starts.
       const RepeatedField<int32_t>& title_starts(iter->title_word_starts());
       for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin();
            jiter != title_starts.end(); ++jiter)
         word_starts.title_word_starts_.push_back(*jiter);
       word_starts_map_[history_id] = word_starts;
     }
   } else {
     // Since the cache did not contain any word starts we must rebuild then from
     // the URL and page titles.
     for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
          iter != history_info_map_.end(); ++iter) {
       RowWordStarts word_starts;
       const history::URLRow& row(iter->second.url_row);
       const base::string16& url =
           bookmarks::CleanUpUrlForMatching(row.url(), nullptr);
       String16VectorFromString16(url, false, &word_starts.url_word_starts_);
       const base::string16& title =
           bookmarks::CleanUpTitleForMatching(row.title());
       String16VectorFromString16(title, false, &word_starts.title_word_starts_);
       word_starts_map_[iter->first] = word_starts;
     }
   }
   return true;
 }

 // static
 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
     const GURL& gurl,
     const std::set<std::string>& whitelist) {
   return whitelist.find(gurl.scheme()) != whitelist.end();
 }


 // SearchTermCacheItem ---------------------------------------------------------

 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(
     const WordIDSet& word_id_set,
     const HistoryIDSet& history_id_set)
     : word_id_set_(word_id_set), history_id_set_(history_id_set), used_(true) {
 }

 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() : used_(true) {
 }

 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(
     const SearchTermCacheItem& other) = default;

 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {
 }

 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(
     bookmarks::BookmarkModel* bookmark_model,
     TemplateURLService* template_url_service,
     const URLIndexPrivateData& private_data,
     const base::string16& lower_string,
     const String16Vector& lower_terms,
     const base::Time now)
     : bookmark_model_(bookmark_model),
       template_url_service_(template_url_service),
       private_data_(private_data),
       lower_string_(lower_string),
       lower_terms_(lower_terms),
       now_(now) {
   // Calculate offsets for each term.  For instance, the offset for
   // ".net" should be 1, indicating that the actual word-part of the term
   // starts at offset 1.
   lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);
   for (size_t i = 0; i < lower_terms_.size(); ++i) {
     base::i18n::BreakIterator iter(lower_terms_[i],
                                    base::i18n::BreakIterator::BREAK_WORD);
     // If the iterator doesn't work, assume an offset of 0.
     if (!iter.Init())
       continue;
     // Find the first word start. If the iterator didn't find a word break, set
     // an offset of term size. For example, the offset for "://" should be 3,
     // indicating that the word-part is missing.
     while (iter.Advance() && !iter.IsWord()) {}

     lower_terms_to_word_starts_offsets_[i] = iter.prev();
   }
 }

 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(
     const AddHistoryMatch& other) = default;

 URLIndexPrivateData::AddHistoryMatch::~AddHistoryMatch() {
 }

 void URLIndexPrivateData::AddHistoryMatch::operator()(
     const HistoryID history_id) {
   HistoryInfoMap::const_iterator hist_pos =
       private_data_.history_info_map_.find(history_id);
   if (hist_pos == private_data_.history_info_map_.end())
     return;

   WordStartsMap::const_iterator starts_pos =
       private_data_.word_starts_map_.find(history_id);
   DCHECK(starts_pos != private_data_.word_starts_map_.end());

   const history::URLRow& hist_item = hist_pos->second.url_row;
   GURL url = hist_item.url();
   if (!url.is_valid())  // Possible in case of profile corruption.
     return;

   // Skip results corresponding to queries from the default search engine.
   // These are low-quality, difficult-to-understand matches for users.
   // SearchProvider should surface past queries in a better way.
   TemplateURL* template_url =
       template_url_service_ ? template_url_service_->GetDefaultSearchProvider()
                             : nullptr;
   if (template_url &&
       template_url->IsSearchURL(url,
                                 template_url_service_->search_terms_data()))
     return;

   const VisitInfoVector& visits = hist_pos->second.visits;
   ScoredHistoryMatch match(
       hist_item, visits, lower_string_, lower_terms_,
       lower_terms_to_word_starts_offsets_, starts_pos->second,
       bookmark_model_ && bookmark_model_->IsBookmarked(url), now_);
   if (match.raw_score > 0)
     scored_matches_.push_back(match);
 }


 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
     const HistoryInfoMap& history_info_map)
     : history_info_map_(history_info_map) {
 }

 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {
 }

 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
     const HistoryID h1,
     const HistoryID h2) {
   HistoryInfoMap::const_iterator entry1(history_info_map_.find(h1));
   if (entry1 == history_info_map_.end())
     return false;
   HistoryInfoMap::const_iterator entry2(history_info_map_.find(h2));
   if (entry2 == history_info_map_.end())
     return true;
   const history::URLRow& r1(entry1->second.url_row);
   const history::URLRow& r2(entry2->second.url_row);
   // First cut: typed count, visit count, recency.
   // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
   // recently visited (within the last 12/24 hours) as highly important. Get
   // input from mpearson.
   if (r1.typed_count() != r2.typed_count())
     return (r1.typed_count() > r2.typed_count());
   if (r1.visit_count() != r2.visit_count())
     return (r1.visit_count() > r2.visit_count());
   return (r1.last_visit() > r2.last_visit());
 }