blob: 8583e6772917b4435a730d4abcd44f60546cb128 [file] [log] [blame]
Etienne Pierre-Doray160839132017-08-11 01:22:351// Copyright 2017 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Samuel Huang577ef6c2018-03-13 18:19:345#include "components/zucchini/equivalence_map.h"
Etienne Pierre-Doray160839132017-08-11 01:22:356
7#include <algorithm>
Samuel Huang577ef6c2018-03-13 18:19:348#include <utility>
Etienne Pierre-Doray160839132017-08-11 01:22:359
10#include "base/logging.h"
Samuel Huangad7a5c02018-06-26 14:47:0211#include "base/numerics/safe_conversions.h"
Jdragona248d5c2018-08-24 12:46:4212#include "base/stl_util.h"
Samuel Huang577ef6c2018-03-13 18:19:3413#include "components/zucchini/encoded_view.h"
14#include "components/zucchini/patch_reader.h"
15#include "components/zucchini/suffix_array.h"
Etienne Pierre-Doray160839132017-08-11 01:22:3516
17namespace zucchini {
18
Calder Kitagawa94722d4e2018-06-28 15:32:1619namespace {
20
21// TODO(haungs): Tune these numbers to improve pathological case results.
22
23// In pathological cases Zucchini can exhibit O(n^2) behavior if the seed
24// selection process runs to completion. To prevent this we impose a quota for
25// the total length of equivalences the seed selection process can perform
26// trials on. For regular use cases it is unlikely this quota will be exceeded,
27// and if it is the effects on patch size are expected to be small.
28constexpr uint64_t kSeedSelectionTotalVisitLengthQuota = 1 << 18; // 256 KiB
29
30// The aforementioned quota alone is insufficient, as exploring backwards will
31// still be very successful resulting in O(n) behavior in the case of a limited
32// seed selection trials. This results in O(n^2) behavior returning. To mitigate
33// this we also impose a cap on the ExtendEquivalenceBackward() exploration.
34constexpr offset_t kBackwardsExtendLimit = 1 << 16; // 64 KiB
35
36} // namespace
37
Etienne Pierre-Doray160839132017-08-11 01:22:3538/******** Utility Functions ********/
39
Etienne Pierre-Dorayefe28342018-01-09 13:47:5340double GetTokenSimilarity(
41 const ImageIndex& old_image_index,
42 const ImageIndex& new_image_index,
43 const std::vector<TargetsAffinity>& targets_affinities,
44 offset_t src,
45 offset_t dst) {
Etienne Pierre-Doray01319562017-12-30 20:53:3346 DCHECK(old_image_index.IsToken(src));
47 DCHECK(new_image_index.IsToken(dst));
Etienne Pierre-Doray160839132017-08-11 01:22:3548
Etienne Pierre-Doray01319562017-12-30 20:53:3349 TypeTag old_type = old_image_index.LookupType(src);
50 TypeTag new_type = new_image_index.LookupType(dst);
Etienne Pierre-Doray160839132017-08-11 01:22:3551 if (old_type != new_type)
52 return kMismatchFatal;
53
54 // Raw comparison.
Etienne Pierre-Doray01319562017-12-30 20:53:3355 if (!old_image_index.IsReference(src) && !new_image_index.IsReference(dst)) {
56 return old_image_index.GetRawValue(src) == new_image_index.GetRawValue(dst)
57 ? 1.0
58 : -1.5;
Etienne Pierre-Doray160839132017-08-11 01:22:3559 }
60
Etienne Pierre-Doray01319562017-12-30 20:53:3361 const ReferenceSet& old_ref_set = old_image_index.refs(old_type);
62 const ReferenceSet& new_ref_set = new_image_index.refs(new_type);
Etienne Pierre-doray0434f5b2018-08-13 18:49:0063 Reference old_reference = old_ref_set.at(src);
64 Reference new_reference = new_ref_set.at(dst);
Etienne Pierre-Dorayefe28342018-01-09 13:47:5365 PoolTag pool_tag = old_ref_set.pool_tag();
Etienne Pierre-Doray01319562017-12-30 20:53:3366
Etienne Pierre-Dorayefe28342018-01-09 13:47:5367 double affinity = targets_affinities[pool_tag.value()].AffinityBetween(
Etienne Pierre-doray0434f5b2018-08-13 18:49:0068 old_ref_set.target_pool().KeyForOffset(old_reference.target),
69 new_ref_set.target_pool().KeyForOffset(new_reference.target));
Etienne Pierre-Doray160839132017-08-11 01:22:3570
71 // Both targets are not associated, which implies a weak match.
Etienne Pierre-Dorayefe28342018-01-09 13:47:5372 if (affinity == 0.0)
Etienne Pierre-Doray01319562017-12-30 20:53:3373 return 0.5 * old_ref_set.width();
Etienne Pierre-Doray160839132017-08-11 01:22:3574
75 // At least one target is associated, so values are compared.
Etienne Pierre-Dorayefe28342018-01-09 13:47:5376 return affinity > 0.0 ? old_ref_set.width() : -2.0;
Etienne Pierre-Doray160839132017-08-11 01:22:3577}
78
Etienne Pierre-Dorayefe28342018-01-09 13:47:5379double GetEquivalenceSimilarity(
80 const ImageIndex& old_image_index,
81 const ImageIndex& new_image_index,
82 const std::vector<TargetsAffinity>& targets_affinities,
83 const Equivalence& equivalence) {
Etienne Pierre-Doray160839132017-08-11 01:22:3584 double similarity = 0.0;
85 for (offset_t k = 0; k < equivalence.length; ++k) {
86 // Non-tokens are joined with the nearest previous token: skip until we
87 // cover the unit.
Etienne Pierre-Doray01319562017-12-30 20:53:3388 if (!new_image_index.IsToken(equivalence.dst_offset + k))
Etienne Pierre-Doray160839132017-08-11 01:22:3589 continue;
90
Etienne Pierre-Dorayefe28342018-01-09 13:47:5391 similarity += GetTokenSimilarity(
92 old_image_index, new_image_index, targets_affinities,
93 equivalence.src_offset + k, equivalence.dst_offset + k);
Etienne Pierre-Doray160839132017-08-11 01:22:3594 if (similarity == kMismatchFatal)
95 return kMismatchFatal;
96 }
97 return similarity;
98}
99
100EquivalenceCandidate ExtendEquivalenceForward(
Etienne Pierre-Doray01319562017-12-30 20:53:33101 const ImageIndex& old_image_index,
102 const ImageIndex& new_image_index,
Etienne Pierre-Dorayefe28342018-01-09 13:47:53103 const std::vector<TargetsAffinity>& targets_affinities,
Etienne Pierre-Doray160839132017-08-11 01:22:35104 const EquivalenceCandidate& candidate,
105 double min_similarity) {
106 Equivalence equivalence = candidate.eq;
107 offset_t best_k = equivalence.length;
108 double current_similarity = candidate.similarity;
109 double best_similarity = current_similarity;
110 double current_penalty = min_similarity;
Etienne Pierre-Doray01319562017-12-30 20:53:33111 for (offset_t k = best_k;
112 equivalence.src_offset + k < old_image_index.size() &&
113 equivalence.dst_offset + k < new_image_index.size();
Etienne Pierre-Doray160839132017-08-11 01:22:35114 ++k) {
115 // Mismatch in type, |candidate| cannot be extended further.
Etienne Pierre-Doray01319562017-12-30 20:53:33116 if (old_image_index.LookupType(equivalence.src_offset + k) !=
117 new_image_index.LookupType(equivalence.dst_offset + k)) {
Etienne Pierre-Doray160839132017-08-11 01:22:35118 break;
Etienne Pierre-Doraya846f682017-08-31 20:30:47119 }
Etienne Pierre-Doray160839132017-08-11 01:22:35120
Etienne Pierre-Doray01319562017-12-30 20:53:33121 if (!new_image_index.IsToken(equivalence.dst_offset + k)) {
Etienne Pierre-Doray160839132017-08-11 01:22:35122 // Non-tokens are joined with the nearest previous token: skip until we
123 // cover the unit, and extend |best_k| if applicable.
124 if (best_k == k)
125 best_k = k + 1;
126 continue;
127 }
128
Etienne Pierre-Dorayefe28342018-01-09 13:47:53129 double similarity = GetTokenSimilarity(
130 old_image_index, new_image_index, targets_affinities,
131 equivalence.src_offset + k, equivalence.dst_offset + k);
Etienne Pierre-Doray160839132017-08-11 01:22:35132 current_similarity += similarity;
133 current_penalty = std::max(0.0, current_penalty) - similarity;
134
135 if (current_similarity < 0.0 || current_penalty >= min_similarity)
136 break;
137 if (current_similarity >= best_similarity) {
138 best_similarity = current_similarity;
139 best_k = k + 1;
140 }
141 }
142 equivalence.length = best_k;
143 return {equivalence, best_similarity};
144}
145
146EquivalenceCandidate ExtendEquivalenceBackward(
Etienne Pierre-Doray01319562017-12-30 20:53:33147 const ImageIndex& old_image_index,
148 const ImageIndex& new_image_index,
Etienne Pierre-Dorayefe28342018-01-09 13:47:53149 const std::vector<TargetsAffinity>& targets_affinities,
Etienne Pierre-Doray160839132017-08-11 01:22:35150 const EquivalenceCandidate& candidate,
151 double min_similarity) {
152 Equivalence equivalence = candidate.eq;
153 offset_t best_k = 0;
154 double current_similarity = candidate.similarity;
155 double best_similarity = current_similarity;
156 double current_penalty = 0.0;
Calder Kitagawa94722d4e2018-06-28 15:32:16157 offset_t k_min = std::min(
158 {equivalence.dst_offset, equivalence.src_offset, kBackwardsExtendLimit});
159 for (offset_t k = 1; k <= k_min; ++k) {
Etienne Pierre-Doray160839132017-08-11 01:22:35160 // Mismatch in type, |candidate| cannot be extended further.
Etienne Pierre-Doray01319562017-12-30 20:53:33161 if (old_image_index.LookupType(equivalence.src_offset - k) !=
162 new_image_index.LookupType(equivalence.dst_offset - k)) {
Etienne Pierre-Doray160839132017-08-11 01:22:35163 break;
Etienne Pierre-Doraya846f682017-08-31 20:30:47164 }
Etienne Pierre-Doray160839132017-08-11 01:22:35165
166 // Non-tokens are joined with the nearest previous token: skip until we
167 // reach the next token.
Etienne Pierre-Doray01319562017-12-30 20:53:33168 if (!new_image_index.IsToken(equivalence.dst_offset - k))
Etienne Pierre-Doray160839132017-08-11 01:22:35169 continue;
170
Etienne Pierre-Doray01319562017-12-30 20:53:33171 DCHECK_EQ(old_image_index.LookupType(equivalence.src_offset - k),
172 new_image_index.LookupType(equivalence.dst_offset -
173 k)); // Sanity check.
Etienne Pierre-Dorayefe28342018-01-09 13:47:53174 double similarity = GetTokenSimilarity(
175 old_image_index, new_image_index, targets_affinities,
176 equivalence.src_offset - k, equivalence.dst_offset - k);
177
Etienne Pierre-Doray160839132017-08-11 01:22:35178 current_similarity += similarity;
179 current_penalty = std::max(0.0, current_penalty) - similarity;
180
181 if (current_similarity < 0.0 || current_penalty >= min_similarity)
182 break;
183 if (current_similarity >= best_similarity) {
184 best_similarity = current_similarity;
185 best_k = k;
186 }
187 }
188
189 equivalence.dst_offset -= best_k;
190 equivalence.src_offset -= best_k;
191 equivalence.length += best_k;
192 return {equivalence, best_similarity};
193}
194
Etienne Pierre-Dorayefe28342018-01-09 13:47:53195EquivalenceCandidate VisitEquivalenceSeed(
196 const ImageIndex& old_image_index,
197 const ImageIndex& new_image_index,
198 const std::vector<TargetsAffinity>& targets_affinities,
199 offset_t src,
200 offset_t dst,
201 double min_similarity) {
Etienne Pierre-Doray160839132017-08-11 01:22:35202 EquivalenceCandidate candidate{{src, dst, 0}, 0.0}; // Empty.
Etienne Pierre-Dorayefe28342018-01-09 13:47:53203 if (!old_image_index.IsToken(src))
204 return candidate;
205 candidate =
206 ExtendEquivalenceForward(old_image_index, new_image_index,
207 targets_affinities, candidate, min_similarity);
Etienne Pierre-Doray160839132017-08-11 01:22:35208 if (candidate.similarity < min_similarity)
209 return candidate; // Not worth exploring any more.
Etienne Pierre-Dorayefe28342018-01-09 13:47:53210 return ExtendEquivalenceBackward(old_image_index, new_image_index,
211 targets_affinities, candidate,
Etienne Pierre-Doray160839132017-08-11 01:22:35212 min_similarity);
213}
214
Etienne Pierre-Doray78754aa2018-03-07 16:26:52215/******** OffsetMapper ********/
216
Samuel Huangad7a5c02018-06-26 14:47:02217OffsetMapper::OffsetMapper(std::vector<Equivalence>&& equivalences,
Etienne Pierre-doray5946dbfa2018-09-10 16:19:33218 offset_t old_image_size,
219 offset_t new_image_size)
Samuel Huangad7a5c02018-06-26 14:47:02220 : equivalences_(std::move(equivalences)),
221 old_image_size_(old_image_size),
222 new_image_size_(new_image_size) {
223 DCHECK_GT(new_image_size_, 0U);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52224 DCHECK(std::is_sorted(equivalences_.begin(), equivalences_.end(),
225 [](const Equivalence& a, const Equivalence& b) {
226 return a.src_offset < b.src_offset;
227 }));
Samuel Huangad7a5c02018-06-26 14:47:02228 // This is for testing. Assume pruned.
Etienne Pierre-Doray78754aa2018-03-07 16:26:52229}
230
Samuel Huangad7a5c02018-06-26 14:47:02231OffsetMapper::OffsetMapper(EquivalenceSource&& equivalence_source,
Etienne Pierre-doray5946dbfa2018-09-10 16:19:33232 offset_t old_image_size,
233 offset_t new_image_size)
Samuel Huangad7a5c02018-06-26 14:47:02234 : old_image_size_(old_image_size), new_image_size_(new_image_size) {
235 DCHECK_GT(new_image_size_, 0U);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52236 for (auto e = equivalence_source.GetNext(); e.has_value();
237 e = equivalence_source.GetNext()) {
238 equivalences_.push_back(*e);
239 }
240 PruneEquivalencesAndSortBySource(&equivalences_);
241}
242
Samuel Huangad7a5c02018-06-26 14:47:02243OffsetMapper::OffsetMapper(const EquivalenceMap& equivalence_map,
Etienne Pierre-doray5946dbfa2018-09-10 16:19:33244 offset_t old_image_size,
245 offset_t new_image_size)
Samuel Huangad7a5c02018-06-26 14:47:02246 : equivalences_(equivalence_map.size()),
247 old_image_size_(old_image_size),
248 new_image_size_(new_image_size) {
249 DCHECK_GT(new_image_size_, 0U);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52250 std::transform(equivalence_map.begin(), equivalence_map.end(),
251 equivalences_.begin(),
252 [](const EquivalenceCandidate& c) { return c.eq; });
253 PruneEquivalencesAndSortBySource(&equivalences_);
254}
255
256OffsetMapper::~OffsetMapper() = default;
257
Samuel Huangad7a5c02018-06-26 14:47:02258// Safely evaluates |offset - unit.src_offset + unit.dst_offset| with signed
259// arithmetic, then clips the result to |[0, new_image_size_)|.
260offset_t OffsetMapper::NaiveExtendedForwardProject(const Equivalence& unit,
261 offset_t offset) const {
262 int64_t old_offset64 = offset;
263 int64_t src_offset64 = unit.src_offset;
264 int64_t dst_offset64 = unit.dst_offset;
265 uint64_t new_offset64 = std::min<uint64_t>(
266 std::max<int64_t>(0LL, old_offset64 - src_offset64 + dst_offset64),
267 new_image_size_ - 1);
268 return base::checked_cast<offset_t>(new_offset64);
269}
270
271offset_t OffsetMapper::ExtendedForwardProject(offset_t offset) const {
272 DCHECK(!equivalences_.empty());
273 if (offset < old_image_size_) {
274 // Finds the equivalence unit whose "old" block is nearest to |offset|,
275 // favoring the block with lower offset in case of a tie.
276 auto pos = std::upper_bound(
277 equivalences_.begin(), equivalences_.end(), offset,
278 [](offset_t a, const Equivalence& b) { return a < b.src_offset; });
279 // For tiebreaking: |offset - pos[-1].src_end()| is actually 1 less than
280 // |offset|'s distance to "old" block of |pos[-1]|. Therefore "<" is used.
281 if (pos != equivalences_.begin() &&
282 (pos == equivalences_.end() || offset < pos[-1].src_end() ||
283 offset - pos[-1].src_end() < pos->src_offset - offset)) {
Etienne Pierre-Doray78754aa2018-03-07 16:26:52284 --pos;
285 }
Samuel Huangad7a5c02018-06-26 14:47:02286 return NaiveExtendedForwardProject(*pos, offset);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52287 }
Samuel Huangad7a5c02018-06-26 14:47:02288 // Fake offsets.
289 offset_t delta = offset - old_image_size_;
290 return delta < kOffsetBound - new_image_size_ ? new_image_size_ + delta
291 : kOffsetBound - 1;
Etienne Pierre-Doray78754aa2018-03-07 16:26:52292}
293
294void OffsetMapper::ForwardProjectAll(std::vector<offset_t>* offsets) const {
295 DCHECK(std::is_sorted(offsets->begin(), offsets->end()));
296 auto current = equivalences_.begin();
297 for (auto& src : *offsets) {
298 while (current != end() && current->src_end() <= src) {
299 ++current;
300 }
301
302 if (current != end() && current->src_offset <= src) {
303 src = src - current->src_offset + current->dst_offset;
304 } else {
305 src = kInvalidOffset;
306 }
307 }
Jdragona248d5c2018-08-24 12:46:42308 base::Erase(*offsets, kInvalidOffset);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52309 offsets->shrink_to_fit();
310}
311
312void OffsetMapper::PruneEquivalencesAndSortBySource(
313 std::vector<Equivalence>* equivalences) {
314 std::sort(equivalences->begin(), equivalences->end(),
315 [](const Equivalence& a, const Equivalence& b) {
316 return a.src_offset < b.src_offset;
317 });
318
319 for (auto current = equivalences->begin(); current != equivalences->end();
320 ++current) {
321 // A "reaper" is an equivalence after |current| that overlaps with it, but
322 // is longer, and so truncates |current|. For example:
323 // ****** <= |current|
324 // **
325 // ****
326 // ****
327 // ********** <= |next| as reaper.
328 // If a reaper is found (as |next|), every equivalence strictly between
329 // |current| and |next| would be truncated to 0 and discarded. Handling this
330 // case is important to avoid O(n^2) behavior.
331 bool next_is_reaper = false;
332
333 // Look ahead to resolve overlaps, until a better candidate is found.
334 auto next = current + 1;
335 for (; next != equivalences->end(); ++next) {
336 DCHECK_GE(next->src_offset, current->src_offset);
337 if (next->src_offset >= current->src_end())
338 break; // No more overlap.
339
340 if (current->length < next->length) {
341 // |next| is better: So it is a reaper that shrinks |current|.
342 offset_t delta = current->src_end() - next->src_offset;
343 current->length -= delta;
344 next_is_reaper = true;
345 break;
346 }
347 }
348
349 if (next_is_reaper) {
350 // Discard all equivalences strictly between |cur| and |next|.
351 for (auto reduced = current + 1; reduced != next; ++reduced)
352 reduced->length = 0;
353 current = next - 1;
354 } else {
355 // Shrink all equivalences that overlap with |current|. These are all
356 // worse than |current| since no reaper is found.
357 for (auto reduced = current + 1; reduced != next; ++reduced) {
Calder Kitagawabdc237142018-03-09 19:08:41358 offset_t delta = current->src_end() - reduced->src_offset;
359 reduced->length -= std::min(reduced->length, delta);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52360 reduced->src_offset += delta;
361 reduced->dst_offset += delta;
362 DCHECK_EQ(reduced->src_offset, current->src_end());
363 }
364 }
365 }
366
367 // Discard all equivalences with length == 0.
Jdragona248d5c2018-08-24 12:46:42368 base::EraseIf(*equivalences, [](const Equivalence& equivalence) {
369 return equivalence.length == 0;
370 });
Etienne Pierre-Doray78754aa2018-03-07 16:26:52371}
372
Etienne Pierre-Doray160839132017-08-11 01:22:35373/******** EquivalenceMap ********/
374
375EquivalenceMap::EquivalenceMap() = default;
Etienne Pierre-Dorayf5e4fa82017-08-15 17:58:18376
Etienne Pierre-Doray78754aa2018-03-07 16:26:52377EquivalenceMap::EquivalenceMap(std::vector<EquivalenceCandidate>&& equivalences)
378 : candidates_(std::move(equivalences)) {
Etienne Pierre-Dorayf5e4fa82017-08-15 17:58:18379 SortByDestination();
380}
381
Etienne Pierre-Doray65b0a03a2017-08-18 17:24:02382EquivalenceMap::EquivalenceMap(EquivalenceMap&&) = default;
383
Etienne Pierre-Doray160839132017-08-11 01:22:35384EquivalenceMap::~EquivalenceMap() = default;
385
Etienne Pierre-Dorayefe28342018-01-09 13:47:53386void EquivalenceMap::Build(
387 const std::vector<offset_t>& old_sa,
388 const EncodedView& old_view,
389 const EncodedView& new_view,
390 const std::vector<TargetsAffinity>& targets_affinities,
391 double min_similarity) {
Etienne Pierre-Doray01319562017-12-30 20:53:33392 DCHECK_EQ(old_sa.size(), old_view.size());
Etienne Pierre-Doray160839132017-08-11 01:22:35393
Etienne Pierre-Dorayefe28342018-01-09 13:47:53394 CreateCandidates(old_sa, old_view, new_view, targets_affinities,
395 min_similarity);
Etienne Pierre-Doray160839132017-08-11 01:22:35396 SortByDestination();
Etienne Pierre-Dorayefe28342018-01-09 13:47:53397 Prune(old_view, new_view, targets_affinities, min_similarity);
Etienne Pierre-Doray160839132017-08-11 01:22:35398
399 offset_t coverage = 0;
400 offset_t current_offset = 0;
401 for (auto candidate : candidates_) {
402 DCHECK_GE(candidate.eq.dst_offset, current_offset);
403 coverage += candidate.eq.length;
404 current_offset = candidate.eq.dst_end();
405 }
406 LOG(INFO) << "Equivalence Count: " << size();
407 LOG(INFO) << "Coverage / Extra / Total: " << coverage << " / "
Etienne Pierre-Doray01319562017-12-30 20:53:33408 << new_view.size() - coverage << " / " << new_view.size();
Etienne Pierre-Doray160839132017-08-11 01:22:35409}
410
Etienne Pierre-Dorayefe28342018-01-09 13:47:53411void EquivalenceMap::CreateCandidates(
412 const std::vector<offset_t>& old_sa,
413 const EncodedView& old_view,
414 const EncodedView& new_view,
415 const std::vector<TargetsAffinity>& targets_affinities,
416 double min_similarity) {
Etienne Pierre-Doray160839132017-08-11 01:22:35417 candidates_.clear();
418
419 // This is an heuristic to find 'good' equivalences on encoded views.
420 // Equivalences are found in ascending order of |new_image|.
Etienne Pierre-Doray160839132017-08-11 01:22:35421 offset_t dst_offset = 0;
422
Etienne Pierre-Doray01319562017-12-30 20:53:33423 while (dst_offset < new_view.size()) {
424 if (!new_view.IsToken(dst_offset)) {
Etienne Pierre-Doray160839132017-08-11 01:22:35425 ++dst_offset;
426 continue;
427 }
428 auto match =
429 SuffixLowerBound(old_sa, old_view.begin(),
430 new_view.begin() + dst_offset, new_view.end());
431
432 offset_t next_dst_offset = dst_offset + 1;
433 // TODO(huangs): Clean up.
434 double best_similarity = min_similarity;
Calder Kitagawa94722d4e2018-06-28 15:32:16435 uint64_t total_visit_length = 0;
Etienne Pierre-Doray160839132017-08-11 01:22:35436 EquivalenceCandidate best_candidate = {{0, 0, 0}, 0.0};
437 for (auto it = match; it != old_sa.end(); ++it) {
Etienne Pierre-Doray01319562017-12-30 20:53:33438 EquivalenceCandidate candidate = VisitEquivalenceSeed(
Etienne Pierre-Dorayefe28342018-01-09 13:47:53439 old_view.image_index(), new_view.image_index(), targets_affinities,
Etienne Pierre-Doray01319562017-12-30 20:53:33440 static_cast<offset_t>(*it), dst_offset, min_similarity);
Etienne Pierre-Doray160839132017-08-11 01:22:35441 if (candidate.similarity > best_similarity) {
442 best_candidate = candidate;
443 best_similarity = candidate.similarity;
444 next_dst_offset = candidate.eq.dst_end();
Calder Kitagawa94722d4e2018-06-28 15:32:16445 total_visit_length += candidate.eq.length;
446 if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) {
447 break;
448 }
Etienne Pierre-Doray160839132017-08-11 01:22:35449 } else {
450 break;
451 }
452 }
Calder Kitagawa94722d4e2018-06-28 15:32:16453 total_visit_length = 0;
Etienne Pierre-Doray160839132017-08-11 01:22:35454 for (auto it = match; it != old_sa.begin(); --it) {
455 EquivalenceCandidate candidate = VisitEquivalenceSeed(
Etienne Pierre-Dorayefe28342018-01-09 13:47:53456 old_view.image_index(), new_view.image_index(), targets_affinities,
Etienne Pierre-Doray01319562017-12-30 20:53:33457 static_cast<offset_t>(it[-1]), dst_offset, min_similarity);
Etienne Pierre-Doray160839132017-08-11 01:22:35458 if (candidate.similarity > best_similarity) {
459 best_candidate = candidate;
460 best_similarity = candidate.similarity;
461 next_dst_offset = candidate.eq.dst_end();
Calder Kitagawa94722d4e2018-06-28 15:32:16462 total_visit_length += candidate.eq.length;
463 if (total_visit_length > kSeedSelectionTotalVisitLengthQuota) {
464 break;
465 }
Etienne Pierre-Doray160839132017-08-11 01:22:35466 } else {
467 break;
468 }
469 }
470 if (best_candidate.similarity >= min_similarity) {
471 candidates_.push_back(best_candidate);
472 }
473
474 dst_offset = next_dst_offset;
475 }
476}
477
478void EquivalenceMap::SortByDestination() {
479 std::sort(candidates_.begin(), candidates_.end(),
480 [](const EquivalenceCandidate& a, const EquivalenceCandidate& b) {
481 return a.eq.dst_offset < b.eq.dst_offset;
482 });
483}
484
Etienne Pierre-Dorayefe28342018-01-09 13:47:53485void EquivalenceMap::Prune(
486 const EncodedView& old_view,
487 const EncodedView& new_view,
488 const std::vector<TargetsAffinity>& target_affinities,
489 double min_similarity) {
Etienne Pierre-Doray78754aa2018-03-07 16:26:52490 // TODO(etiennep): unify with
491 // OffsetMapper::PruneEquivalencesAndSortBySource().
Etienne Pierre-Doray160839132017-08-11 01:22:35492 for (auto current = candidates_.begin(); current != candidates_.end();
493 ++current) {
494 if (current->similarity < min_similarity)
495 continue; // This candidate will be discarded anyways.
496
Etienne Pierre-Doray78754aa2018-03-07 16:26:52497 bool next_is_reaper = false;
498
Etienne Pierre-Doray160839132017-08-11 01:22:35499 // Look ahead to resolve overlaps, until a better candidate is found.
Etienne Pierre-Doray78754aa2018-03-07 16:26:52500 auto next = current + 1;
501 for (; next != candidates_.end(); ++next) {
Etienne Pierre-Doray160839132017-08-11 01:22:35502 DCHECK_GE(next->eq.dst_offset, current->eq.dst_offset);
503 if (next->eq.dst_offset >= current->eq.dst_offset + current->eq.length)
504 break; // No more overlap.
505
Etienne Pierre-Doray160839132017-08-11 01:22:35506 if (current->similarity < next->similarity) {
Etienne Pierre-Doray78754aa2018-03-07 16:26:52507 // |next| is better: So it is a reaper that shrinks |current|.
508 offset_t delta = current->eq.dst_end() - next->eq.dst_offset;
Etienne Pierre-Doray160839132017-08-11 01:22:35509 current->eq.length -= delta;
Etienne Pierre-Doray01319562017-12-30 20:53:33510 current->similarity = GetEquivalenceSimilarity(
Etienne Pierre-Dorayefe28342018-01-09 13:47:53511 old_view.image_index(), new_view.image_index(), target_affinities,
512 current->eq);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52513
514 next_is_reaper = true;
Etienne Pierre-Doray160839132017-08-11 01:22:35515 break;
516 }
517 }
518
Etienne Pierre-Doray78754aa2018-03-07 16:26:52519 if (next_is_reaper) {
520 // Discard all equivalences strictly between |cur| and |next|.
521 for (auto reduced = current + 1; reduced != next; ++reduced) {
522 reduced->eq.length = 0;
523 reduced->similarity = 0;
524 }
525 current = next - 1;
526 } else {
527 // Shrinks all overlapping candidates following and worse than |current|.
528 for (auto reduced = current + 1; reduced != next; ++reduced) {
Calder Kitagawabdc237142018-03-09 19:08:41529 offset_t delta = current->eq.dst_end() - reduced->eq.dst_offset;
530 reduced->eq.length -= std::min(reduced->eq.length, delta);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52531 reduced->eq.src_offset += delta;
532 reduced->eq.dst_offset += delta;
533 reduced->similarity = GetEquivalenceSimilarity(
534 old_view.image_index(), new_view.image_index(), target_affinities,
535 reduced->eq);
536 DCHECK_EQ(reduced->eq.dst_offset, current->eq.dst_end());
537 }
Etienne Pierre-Doray160839132017-08-11 01:22:35538 }
539 }
540
541 // Discard all candidates with similarity smaller than |min_similarity|.
Jdragona248d5c2018-08-24 12:46:42542 base::EraseIf(candidates_,
543 [min_similarity](const EquivalenceCandidate& candidate) {
544 return candidate.similarity < min_similarity;
545 });
Etienne Pierre-Doray160839132017-08-11 01:22:35546}
547
548} // namespace zucchini