[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "chrome/browser/devtools/devtools_file_system_indexer.h" |
| 6 | |
avi | e4d7b6f | 2015-12-26 00:59:18 | [diff] [blame] | 7 | #include <stddef.h> |
| 8 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 9 | #include <iterator> |
| 10 | |
| 11 | #include "base/bind.h" |
| 12 | #include "base/callback.h" |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 13 | #include "base/files/file_enumerator.h" |
thestig | 18dfb7a5 | 2014-08-26 10:44:04 | [diff] [blame] | 14 | #include "base/files/file_util.h" |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 15 | #include "base/lazy_instance.h" |
| 16 | #include "base/logging.h" |
avi | e4d7b6f | 2015-12-26 00:59:18 | [diff] [blame] | 17 | #include "base/macros.h" |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 18 | #include "base/sequence_checker.h" |
[email protected] | 09f3fde8 | 2014-05-14 15:08:15 | [diff] [blame] | 19 | #include "base/stl_util.h" |
zhongyi | 2396034 | 2016-04-12 23:13:20 | [diff] [blame] | 20 | #include "base/strings/string_util.h" |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 21 | #include "base/strings/utf_string_conversions.h" |
Gabriel Charette | 44db142 | 2018-08-06 11:19:33 | [diff] [blame] | 22 | #include "base/task/lazy_task_runner.h" |
| 23 | #include "base/task/post_task.h" |
Eric Seckler | 8652dcd5 | 2018-09-20 10:42:28 | [diff] [blame] | 24 | #include "content/public/browser/browser_task_traits.h" |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 25 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 26 | #include "content/public/browser/browser_thread.h" |
| 27 | |
| 28 | using base::Bind; |
| 29 | using base::Callback; |
| 30 | using base::FileEnumerator; |
| 31 | using base::FilePath; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 32 | using base::Time; |
| 33 | using base::TimeDelta; |
| 34 | using base::TimeTicks; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 35 | using content::BrowserThread; |
| 36 | using std::map; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 37 | using std::string; |
| 38 | using std::vector; |
| 39 | |
| 40 | namespace { |
| 41 | |
Maksim Sisov | 63c4dc41 | 2017-08-16 10:56:58 | [diff] [blame] | 42 | using std::set; |
| 43 | |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 44 | base::SequencedTaskRunner* impl_task_runner() { |
Gabriel Charette | b10aeebc | 2018-07-26 20:15:00 | [diff] [blame] | 45 | constexpr base::TaskTraits kBlockingTraits = { |
Sami Kyostila | c958045 | 2019-06-17 12:26:27 | [diff] [blame] | 46 | base::ThreadPool(), base::MayBlock(), base::TaskPriority::BEST_EFFORT}; |
Andrey Kosyakov | f9e21f7 | 2017-07-06 17:38:19 | [diff] [blame] | 47 | static base::LazySequencedTaskRunner s_sequenced_task_task_runner = |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 48 | LAZY_SEQUENCED_TASK_RUNNER_INITIALIZER(kBlockingTraits); |
Andrey Kosyakov | f9e21f7 | 2017-07-06 17:38:19 | [diff] [blame] | 49 | return s_sequenced_task_task_runner.Get().get(); |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 50 | } |
| 51 | |
avi | e4d7b6f | 2015-12-26 00:59:18 | [diff] [blame] | 52 | typedef int32_t Trigram; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 53 | typedef char TrigramChar; |
avi | e4d7b6f | 2015-12-26 00:59:18 | [diff] [blame] | 54 | typedef uint16_t FileId; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 55 | |
| 56 | const int kMinTimeoutBetweenWorkedNitification = 200; |
| 57 | // Trigram characters include all ASCII printable characters (32-126) except for |
| 58 | // the capital letters, because the index is case insensitive. |
| 59 | const size_t kTrigramCharacterCount = 126 - 'Z' - 1 + 'A' - ' ' + 1; |
| 60 | const size_t kTrigramCount = |
| 61 | kTrigramCharacterCount * kTrigramCharacterCount * kTrigramCharacterCount; |
| 62 | const int kMaxReadLength = 10 * 1024; |
| 63 | const TrigramChar kUndefinedTrigramChar = -1; |
vsevik | 3ef1c9d | 2014-10-23 14:17:35 | [diff] [blame] | 64 | const TrigramChar kBinaryTrigramChar = -2; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 65 | const Trigram kUndefinedTrigram = -1; |
| 66 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 67 | class Index { |
| 68 | public: |
| 69 | Index(); |
raphael.kubo.da.costa | 65005e8 | 2016-11-16 11:17:26 | [diff] [blame] | 70 | // Index is only instantiated as a leak LazyInstance, so the destructor is |
| 71 | // never called. |
| 72 | ~Index() = delete; |
| 73 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 74 | Time LastModifiedTimeForFile(const FilePath& file_path); |
| 75 | void SetTrigramsForFile(const FilePath& file_path, |
| 76 | const vector<Trigram>& index, |
| 77 | const Time& time); |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 78 | vector<FilePath> Search(const string& query); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 79 | void NormalizeVectors(); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 80 | void Reset(); |
| 81 | void EnsureInitialized(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 82 | |
| 83 | private: |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 84 | FileId GetFileId(const FilePath& file_path); |
| 85 | |
| 86 | typedef map<FilePath, FileId> FileIdsMap; |
| 87 | FileIdsMap file_ids_; |
| 88 | FileId last_file_id_; |
| 89 | // The index in this vector is the trigram id. |
| 90 | vector<vector<FileId> > index_; |
| 91 | typedef map<FilePath, Time> IndexedFilesMap; |
| 92 | IndexedFilesMap index_times_; |
| 93 | vector<bool> is_normalized_; |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 94 | SEQUENCE_CHECKER(sequence_checker_); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 95 | |
| 96 | DISALLOW_COPY_AND_ASSIGN(Index); |
| 97 | }; |
| 98 | |
| 99 | base::LazyInstance<Index>::Leaky g_trigram_index = LAZY_INSTANCE_INITIALIZER; |
| 100 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 101 | TrigramChar TrigramCharForChar(char c) { |
vsevik | 3ef1c9d | 2014-10-23 14:17:35 | [diff] [blame] | 102 | static TrigramChar* trigram_chars = nullptr; |
| 103 | if (!trigram_chars) { |
| 104 | trigram_chars = new TrigramChar[256]; |
| 105 | for (size_t i = 0; i < 256; ++i) { |
| 106 | if (i > 127) { |
| 107 | trigram_chars[i] = kUndefinedTrigramChar; |
| 108 | continue; |
| 109 | } |
| 110 | char ch = static_cast<char>(i); |
| 111 | if (ch == '\t') |
| 112 | ch = ' '; |
zhongyi | 2396034 | 2016-04-12 23:13:20 | [diff] [blame] | 113 | if (base::IsAsciiUpper(ch)) |
vsevik | 3ef1c9d | 2014-10-23 14:17:35 | [diff] [blame] | 114 | ch = ch - 'A' + 'a'; |
| 115 | |
| 116 | bool is_binary_char = ch < 9 || (ch >= 14 && ch < 32) || ch == 127; |
| 117 | if (is_binary_char) { |
| 118 | trigram_chars[i] = kBinaryTrigramChar; |
| 119 | continue; |
| 120 | } |
| 121 | |
| 122 | if (ch < ' ') { |
| 123 | trigram_chars[i] = kUndefinedTrigramChar; |
| 124 | continue; |
| 125 | } |
| 126 | |
| 127 | if (ch >= 'Z') |
| 128 | ch = ch - 'Z' - 1 + 'A'; |
| 129 | ch -= ' '; |
| 130 | char signed_trigram_count = static_cast<char>(kTrigramCharacterCount); |
| 131 | CHECK(ch >= 0 && ch < signed_trigram_count); |
| 132 | trigram_chars[i] = ch; |
| 133 | } |
| 134 | } |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 135 | unsigned char uc = static_cast<unsigned char>(c); |
vsevik | 3ef1c9d | 2014-10-23 14:17:35 | [diff] [blame] | 136 | return trigram_chars[uc]; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 137 | } |
| 138 | |
[email protected] | 8f830473 | 2013-08-05 22:25:52 | [diff] [blame] | 139 | Trigram TrigramAtIndex(const vector<TrigramChar>& trigram_chars, size_t index) { |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 140 | static int kTrigramCharacterCountSquared = |
| 141 | kTrigramCharacterCount * kTrigramCharacterCount; |
| 142 | if (trigram_chars[index] == kUndefinedTrigramChar || |
| 143 | trigram_chars[index + 1] == kUndefinedTrigramChar || |
| 144 | trigram_chars[index + 2] == kUndefinedTrigramChar) |
| 145 | return kUndefinedTrigram; |
| 146 | Trigram trigram = kTrigramCharacterCountSquared * trigram_chars[index] + |
| 147 | kTrigramCharacterCount * trigram_chars[index + 1] + |
| 148 | trigram_chars[index + 2]; |
| 149 | return trigram; |
| 150 | } |
| 151 | |
| 152 | Index::Index() : last_file_id_(0) { |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 153 | Reset(); |
| 154 | } |
| 155 | |
| 156 | void Index::Reset() { |
| 157 | file_ids_.clear(); |
| 158 | index_.clear(); |
| 159 | index_times_.clear(); |
| 160 | is_normalized_.clear(); |
| 161 | last_file_id_ = 0; |
| 162 | } |
| 163 | |
| 164 | void Index::EnsureInitialized() { |
Zinovy Nis | dcc844d | 2019-02-28 07:11:29 | [diff] [blame] | 165 | if (!index_.empty()) |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 166 | return; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 167 | index_.resize(kTrigramCount); |
| 168 | is_normalized_.resize(kTrigramCount); |
| 169 | std::fill(is_normalized_.begin(), is_normalized_.end(), true); |
| 170 | } |
| 171 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 172 | Time Index::LastModifiedTimeForFile(const FilePath& file_path) { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 173 | DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 174 | EnsureInitialized(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 175 | Time last_modified_time; |
| 176 | if (index_times_.find(file_path) != index_times_.end()) |
| 177 | last_modified_time = index_times_[file_path]; |
| 178 | return last_modified_time; |
| 179 | } |
| 180 | |
| 181 | void Index::SetTrigramsForFile(const FilePath& file_path, |
| 182 | const vector<Trigram>& index, |
| 183 | const Time& time) { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 184 | DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 185 | EnsureInitialized(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 186 | FileId file_id = GetFileId(file_path); |
jdoerrie | c6fe63e | 2018-10-03 20:53:40 | [diff] [blame] | 187 | auto it = index.begin(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 188 | for (; it != index.end(); ++it) { |
| 189 | Trigram trigram = *it; |
| 190 | index_[trigram].push_back(file_id); |
| 191 | is_normalized_[trigram] = false; |
| 192 | } |
| 193 | index_times_[file_path] = time; |
| 194 | } |
| 195 | |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 196 | vector<FilePath> Index::Search(const string& query) { |
| 197 | DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 198 | EnsureInitialized(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 199 | const char* data = query.c_str(); |
| 200 | vector<TrigramChar> trigram_chars; |
| 201 | trigram_chars.reserve(query.size()); |
vsevik | 3ef1c9d | 2014-10-23 14:17:35 | [diff] [blame] | 202 | for (size_t i = 0; i < query.size(); ++i) { |
| 203 | TrigramChar trigram_char = TrigramCharForChar(data[i]); |
| 204 | if (trigram_char == kBinaryTrigramChar) |
| 205 | trigram_char = kUndefinedTrigramChar; |
| 206 | trigram_chars.push_back(trigram_char); |
| 207 | } |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 208 | vector<Trigram> trigrams; |
| 209 | for (size_t i = 0; i + 2 < query.size(); ++i) { |
| 210 | Trigram trigram = TrigramAtIndex(trigram_chars, i); |
| 211 | if (trigram != kUndefinedTrigram) |
| 212 | trigrams.push_back(trigram); |
| 213 | } |
| 214 | set<FileId> file_ids; |
| 215 | bool first = true; |
| 216 | vector<Trigram>::const_iterator it = trigrams.begin(); |
| 217 | for (; it != trigrams.end(); ++it) { |
| 218 | Trigram trigram = *it; |
| 219 | if (first) { |
| 220 | std::copy(index_[trigram].begin(), |
| 221 | index_[trigram].end(), |
| 222 | std::inserter(file_ids, file_ids.begin())); |
| 223 | first = false; |
| 224 | continue; |
| 225 | } |
[email protected] | 09f3fde8 | 2014-05-14 15:08:15 | [diff] [blame] | 226 | set<FileId> intersection = base::STLSetIntersection<set<FileId> >( |
| 227 | file_ids, index_[trigram]); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 228 | file_ids.swap(intersection); |
| 229 | } |
| 230 | vector<FilePath> result; |
| 231 | FileIdsMap::const_iterator ids_it = file_ids_.begin(); |
| 232 | for (; ids_it != file_ids_.end(); ++ids_it) { |
Zinovy Nis | dcc844d | 2019-02-28 07:11:29 | [diff] [blame] | 233 | if (trigrams.empty() || file_ids.find(ids_it->second) != file_ids.end()) { |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 234 | result.push_back(ids_it->first); |
| 235 | } |
| 236 | } |
| 237 | return result; |
| 238 | } |
| 239 | |
| 240 | FileId Index::GetFileId(const FilePath& file_path) { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 241 | DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 242 | EnsureInitialized(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 243 | string file_path_str = file_path.AsUTF8Unsafe(); |
| 244 | if (file_ids_.find(file_path) != file_ids_.end()) |
| 245 | return file_ids_[file_path]; |
| 246 | file_ids_[file_path] = ++last_file_id_; |
| 247 | return last_file_id_; |
| 248 | } |
| 249 | |
| 250 | void Index::NormalizeVectors() { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 251 | DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 252 | EnsureInitialized(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 253 | for (size_t i = 0; i < kTrigramCount; ++i) { |
| 254 | if (!is_normalized_[i]) { |
| 255 | std::sort(index_[i].begin(), index_[i].end()); |
| 256 | if (index_[i].capacity() > index_[i].size()) |
| 257 | vector<FileId>(index_[i]).swap(index_[i]); |
| 258 | is_normalized_[i] = true; |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 263 | typedef Callback<void(bool, const vector<bool>&)> IndexerCallback; |
| 264 | |
| 265 | } // namespace |
| 266 | |
| 267 | DevToolsFileSystemIndexer::FileSystemIndexingJob::FileSystemIndexingJob( |
| 268 | const FilePath& file_system_path, |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 269 | const std::vector<base::FilePath>& excluded_folders, |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 270 | const TotalWorkCallback& total_work_callback, |
| 271 | const WorkedCallback& worked_callback, |
| 272 | const DoneCallback& done_callback) |
| 273 | : file_system_path_(file_system_path), |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 274 | excluded_folders_(excluded_folders), |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 275 | total_work_callback_(total_work_callback), |
| 276 | worked_callback_(worked_callback), |
| 277 | done_callback_(done_callback), |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 278 | files_indexed_(0), |
| 279 | stopped_(false) { |
| 280 | current_trigrams_set_.resize(kTrigramCount); |
| 281 | current_trigrams_.reserve(kTrigramCount); |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 282 | pending_folders_.push_back(file_system_path); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 283 | } |
| 284 | |
| 285 | DevToolsFileSystemIndexer::FileSystemIndexingJob::~FileSystemIndexingJob() {} |
| 286 | |
| 287 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::Start() { |
mostynb | 13260d5 | 2015-03-26 09:12:09 | [diff] [blame] | 288 | DCHECK_CURRENTLY_ON(BrowserThread::UI); |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 289 | impl_task_runner()->PostTask( |
| 290 | FROM_HERE, BindOnce(&FileSystemIndexingJob::CollectFilesToIndex, this)); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 291 | } |
| 292 | |
| 293 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::Stop() { |
mostynb | 13260d5 | 2015-03-26 09:12:09 | [diff] [blame] | 294 | DCHECK_CURRENTLY_ON(BrowserThread::UI); |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 295 | impl_task_runner()->PostTask( |
| 296 | FROM_HERE, BindOnce(&FileSystemIndexingJob::StopOnImplSequence, this)); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 297 | } |
| 298 | |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 299 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::StopOnImplSequence() { |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 300 | stopped_ = true; |
| 301 | } |
| 302 | |
| 303 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::CollectFilesToIndex() { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 304 | DCHECK(impl_task_runner()->RunsTasksInCurrentSequence()); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 305 | if (stopped_) |
| 306 | return; |
| 307 | if (!file_enumerator_) { |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 308 | file_enumerator_.reset(new FileEnumerator( |
| 309 | pending_folders_.back(), false, |
| 310 | FileEnumerator::FILES | FileEnumerator::DIRECTORIES)); |
| 311 | pending_folders_.pop_back(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 312 | } |
| 313 | FilePath file_path = file_enumerator_->Next(); |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 314 | if (file_path.empty() && !pending_folders_.empty()) { |
| 315 | file_enumerator_.reset(new FileEnumerator( |
| 316 | pending_folders_.back(), false, |
| 317 | FileEnumerator::FILES | FileEnumerator::DIRECTORIES)); |
| 318 | pending_folders_.pop_back(); |
| 319 | impl_task_runner()->PostTask( |
| 320 | FROM_HERE, BindOnce(&FileSystemIndexingJob::CollectFilesToIndex, this)); |
| 321 | return; |
| 322 | } |
| 323 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 324 | if (file_path.empty()) { |
Sami Kyostila | 4ba007d | 2019-08-14 12:03:14 | [diff] [blame] | 325 | base::PostTask(FROM_HERE, {BrowserThread::UI}, |
| 326 | BindOnce(total_work_callback_, file_path_times_.size())); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 327 | indexing_it_ = file_path_times_.begin(); |
| 328 | IndexFiles(); |
| 329 | return; |
| 330 | } |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 331 | if (file_enumerator_->GetInfo().IsDirectory()) { |
| 332 | bool excluded = false; |
| 333 | for (const FilePath& excluded_folder : excluded_folders_) { |
| 334 | excluded = excluded_folder.IsParent(file_path); |
| 335 | if (excluded) |
| 336 | break; |
| 337 | } |
| 338 | if (!excluded) |
| 339 | pending_folders_.push_back(file_path); |
| 340 | impl_task_runner()->PostTask( |
| 341 | FROM_HERE, BindOnce(&FileSystemIndexingJob::CollectFilesToIndex, this)); |
| 342 | return; |
| 343 | } |
| 344 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 345 | Time saved_last_modified_time = |
| 346 | g_trigram_index.Get().LastModifiedTimeForFile(file_path); |
| 347 | FileEnumerator::FileInfo file_info = file_enumerator_->GetInfo(); |
| 348 | Time current_last_modified_time = file_info.GetLastModifiedTime(); |
| 349 | if (current_last_modified_time > saved_last_modified_time) { |
| 350 | file_path_times_[file_path] = current_last_modified_time; |
| 351 | } |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 352 | impl_task_runner()->PostTask( |
| 353 | FROM_HERE, BindOnce(&FileSystemIndexingJob::CollectFilesToIndex, this)); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 354 | } |
| 355 | |
| 356 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::IndexFiles() { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 357 | DCHECK(impl_task_runner()->RunsTasksInCurrentSequence()); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 358 | if (stopped_) |
| 359 | return; |
| 360 | if (indexing_it_ == file_path_times_.end()) { |
| 361 | g_trigram_index.Get().NormalizeVectors(); |
Sami Kyostila | 4ba007d | 2019-08-14 12:03:14 | [diff] [blame] | 362 | base::PostTask(FROM_HERE, {BrowserThread::UI}, done_callback_); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 363 | return; |
| 364 | } |
| 365 | FilePath file_path = indexing_it_->first; |
Andrey Kosyakov | c1f83c1e | 2017-06-27 00:28:13 | [diff] [blame] | 366 | current_file_.Initialize(file_path, |
| 367 | base::File::FLAG_OPEN | base::File::FLAG_READ); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 368 | |
[email protected] | bda135f | 2014-04-10 21:55:06 | [diff] [blame] | 369 | if (!current_file_.IsValid()) { |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 370 | FinishFileIndexing(false); |
| 371 | return; |
| 372 | } |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 373 | current_file_offset_ = 0; |
| 374 | current_trigrams_.clear(); |
| 375 | std::fill(current_trigrams_set_.begin(), current_trigrams_set_.end(), false); |
| 376 | ReadFromFile(); |
| 377 | } |
| 378 | |
| 379 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReadFromFile() { |
| 380 | if (stopped_) { |
| 381 | CloseFile(); |
| 382 | return; |
| 383 | } |
Andrey Kosyakov | c1f83c1e | 2017-06-27 00:28:13 | [diff] [blame] | 384 | std::unique_ptr<char[]> data_ptr(new char[kMaxReadLength]); |
| 385 | const char* const data = data_ptr.get(); |
| 386 | int bytes_read = |
| 387 | current_file_.Read(current_file_offset_, data_ptr.get(), kMaxReadLength); |
| 388 | if (bytes_read < 0) { |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 389 | FinishFileIndexing(false); |
| 390 | return; |
| 391 | } |
| 392 | |
Andrey Kosyakov | c1f83c1e | 2017-06-27 00:28:13 | [diff] [blame] | 393 | if (bytes_read < 3) { |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 394 | FinishFileIndexing(true); |
| 395 | return; |
| 396 | } |
| 397 | |
| 398 | size_t size = static_cast<size_t>(bytes_read); |
| 399 | vector<TrigramChar> trigram_chars; |
| 400 | trigram_chars.reserve(size); |
| 401 | for (size_t i = 0; i < size; ++i) { |
vsevik | 3ef1c9d | 2014-10-23 14:17:35 | [diff] [blame] | 402 | TrigramChar trigram_char = TrigramCharForChar(data[i]); |
| 403 | if (trigram_char == kBinaryTrigramChar) { |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 404 | current_trigrams_.clear(); |
| 405 | FinishFileIndexing(true); |
| 406 | return; |
| 407 | } |
vsevik | 3ef1c9d | 2014-10-23 14:17:35 | [diff] [blame] | 408 | trigram_chars.push_back(trigram_char); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 409 | } |
| 410 | |
| 411 | for (size_t i = 0; i + 2 < size; ++i) { |
| 412 | Trigram trigram = TrigramAtIndex(trigram_chars, i); |
| 413 | if ((trigram != kUndefinedTrigram) && !current_trigrams_set_[trigram]) { |
| 414 | current_trigrams_set_[trigram] = true; |
| 415 | current_trigrams_.push_back(trigram); |
| 416 | } |
| 417 | } |
| 418 | current_file_offset_ += bytes_read - 2; |
Andrey Kosyakov | c1f83c1e | 2017-06-27 00:28:13 | [diff] [blame] | 419 | impl_task_runner()->PostTask( |
| 420 | FROM_HERE, base::BindOnce(&FileSystemIndexingJob::ReadFromFile, this)); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 421 | } |
| 422 | |
| 423 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::FinishFileIndexing( |
| 424 | bool success) { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 425 | DCHECK(impl_task_runner()->RunsTasksInCurrentSequence()); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 426 | CloseFile(); |
| 427 | if (success) { |
| 428 | FilePath file_path = indexing_it_->first; |
| 429 | g_trigram_index.Get().SetTrigramsForFile( |
| 430 | file_path, current_trigrams_, file_path_times_[file_path]); |
| 431 | } |
| 432 | ReportWorked(); |
| 433 | ++indexing_it_; |
Andrey Kosyakov | f9e21f7 | 2017-07-06 17:38:19 | [diff] [blame] | 434 | impl_task_runner()->PostTask( |
| 435 | FROM_HERE, base::BindOnce(&FileSystemIndexingJob::IndexFiles, this)); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 436 | } |
| 437 | |
| 438 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseFile() { |
[email protected] | bda135f | 2014-04-10 21:55:06 | [diff] [blame] | 439 | if (current_file_.IsValid()) |
Andrey Kosyakov | c1f83c1e | 2017-06-27 00:28:13 | [diff] [blame] | 440 | current_file_.Close(); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 441 | } |
| 442 | |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 443 | void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReportWorked() { |
| 444 | TimeTicks current_time = TimeTicks::Now(); |
| 445 | bool should_send_worked_nitification = true; |
| 446 | if (!last_worked_notification_time_.is_null()) { |
| 447 | TimeDelta delta = current_time - last_worked_notification_time_; |
| 448 | if (delta.InMilliseconds() < kMinTimeoutBetweenWorkedNitification) |
| 449 | should_send_worked_nitification = false; |
| 450 | } |
| 451 | ++files_indexed_; |
| 452 | if (should_send_worked_nitification) { |
| 453 | last_worked_notification_time_ = current_time; |
Sami Kyostila | 4ba007d | 2019-08-14 12:03:14 | [diff] [blame] | 454 | base::PostTask(FROM_HERE, {BrowserThread::UI}, |
| 455 | BindOnce(worked_callback_, files_indexed_)); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 456 | files_indexed_ = 0; |
| 457 | } |
| 458 | } |
| 459 | |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 460 | static int g_instance_count = 0; |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 461 | |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 462 | DevToolsFileSystemIndexer::DevToolsFileSystemIndexer() { |
Andrey Lushnikov | 6986cc2 | 2018-03-21 22:06:28 | [diff] [blame] | 463 | impl_task_runner()->PostTask(FROM_HERE, |
| 464 | base::BindOnce([]() { ++g_instance_count; })); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 465 | } |
| 466 | |
| 467 | DevToolsFileSystemIndexer::~DevToolsFileSystemIndexer() { |
Andrey Lushnikov | 6986cc2 | 2018-03-21 22:06:28 | [diff] [blame] | 468 | impl_task_runner()->PostTask(FROM_HERE, base::BindOnce([]() { |
| 469 | --g_instance_count; |
| 470 | if (!g_instance_count) |
| 471 | g_trigram_index.Get().Reset(); |
| 472 | })); |
Andrey Lushnikov | 5955e51 | 2018-03-20 22:19:05 | [diff] [blame] | 473 | } |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 474 | |
| 475 | scoped_refptr<DevToolsFileSystemIndexer::FileSystemIndexingJob> |
| 476 | DevToolsFileSystemIndexer::IndexPath( |
| 477 | const string& file_system_path, |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 478 | const vector<string>& excluded_folders, |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 479 | const TotalWorkCallback& total_work_callback, |
| 480 | const WorkedCallback& worked_callback, |
| 481 | const DoneCallback& done_callback) { |
mostynb | 13260d5 | 2015-03-26 09:12:09 | [diff] [blame] | 482 | DCHECK_CURRENTLY_ON(BrowserThread::UI); |
Andrey Lushnikov | 00787886 | 2018-03-21 19:12:00 | [diff] [blame] | 483 | vector<base::FilePath> paths; |
| 484 | for (const string& path : excluded_folders) { |
| 485 | paths.push_back(FilePath::FromUTF8Unsafe(path)); |
| 486 | } |
| 487 | scoped_refptr<FileSystemIndexingJob> indexing_job = new FileSystemIndexingJob( |
| 488 | FilePath::FromUTF8Unsafe(file_system_path), paths, total_work_callback, |
| 489 | worked_callback, done_callback); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 490 | indexing_job->Start(); |
| 491 | return indexing_job; |
| 492 | } |
| 493 | |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 494 | void DevToolsFileSystemIndexer::SearchInPath( |
| 495 | const std::string& file_system_path, |
| 496 | const std::string& query, |
| 497 | const SearchCallback& callback) { |
mostynb | 13260d5 | 2015-03-26 09:12:09 | [diff] [blame] | 498 | DCHECK_CURRENTLY_ON(BrowserThread::UI); |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 499 | impl_task_runner()->PostTask( |
| 500 | FROM_HERE, |
| 501 | BindOnce(&DevToolsFileSystemIndexer::SearchInPathOnImplSequence, this, |
tzik | 93bf8a7 | 2017-04-24 18:53:30 | [diff] [blame] | 502 | file_system_path, query, callback)); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 503 | } |
| 504 | |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 505 | void DevToolsFileSystemIndexer::SearchInPathOnImplSequence( |
| 506 | const std::string& file_system_path, |
| 507 | const std::string& query, |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 508 | const SearchCallback& callback) { |
Andrey Kosyakov | 6d214b21 | 2017-06-23 22:47:08 | [diff] [blame] | 509 | DCHECK(impl_task_runner()->RunsTasksInCurrentSequence()); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 510 | vector<FilePath> file_paths = g_trigram_index.Get().Search(query); |
| 511 | vector<string> result; |
| 512 | FilePath path = FilePath::FromUTF8Unsafe(file_system_path); |
| 513 | vector<FilePath>::const_iterator it = file_paths.begin(); |
| 514 | for (; it != file_paths.end(); ++it) { |
| 515 | if (path.IsParent(*it)) |
| 516 | result.push_back(it->AsUTF8Unsafe()); |
| 517 | } |
Sami Kyostila | 4ba007d | 2019-08-14 12:03:14 | [diff] [blame] | 518 | base::PostTask(FROM_HERE, {BrowserThread::UI}, |
| 519 | BindOnce(callback, std::move(result))); |
[email protected] | 200bd33 | 2013-08-05 16:19:11 | [diff] [blame] | 520 | } |