blob: 01a81b06f0c8bf551a70b3bd5ec141a8704735ef [file] [log] [blame]
Avi Drissman8ba1bad2022-09-13 19:22:361// Copyright 2017 The Chromium Authors
Etienne Pierre-Doray160839132017-08-11 01:22:352// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Samuel Huang577ef6c2018-03-13 18:19:345#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
6#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
Etienne Pierre-Doray160839132017-08-11 01:22:357
8#include <stddef.h>
9
Etienne Pierre-doray7abe67c2021-10-29 14:12:2310#include <deque>
Etienne Pierre-Doray160839132017-08-11 01:22:3511#include <limits>
12#include <vector>
13
Samuel Huang577ef6c2018-03-13 18:19:3414#include "components/zucchini/image_index.h"
15#include "components/zucchini/image_utils.h"
16#include "components/zucchini/targets_affinity.h"
Etienne Pierre-Doray160839132017-08-11 01:22:3517
18namespace zucchini {
19
20constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
21
Etienne Pierre-Doray01319562017-12-30 20:53:3322class EncodedView;
Etienne Pierre-Doray78754aa2018-03-07 16:26:5223class EquivalenceSource;
Etienne Pierre-Doray01319562017-12-30 20:53:3324
Etienne Pierre-Dorayefe28342018-01-09 13:47:5325// Returns similarity score between a token (raw byte or first byte of a
26// reference) in |old_image_index| at |src| and a token in |new_image_index|
27// at |dst|. |targets_affinities| describes affinities for each target pool and
28// is used to evaluate similarity between references, hence it's size must be
29// equal to the number of pools in both |old_image_index| and |new_image_index|.
30// Both |src| and |dst| must refer to tokens in |old_image_index| and
31// |new_image_index|.
32double GetTokenSimilarity(
33 const ImageIndex& old_image_index,
34 const ImageIndex& new_image_index,
35 const std::vector<TargetsAffinity>& targets_affinities,
36 offset_t src,
37 offset_t dst);
Etienne Pierre-Doray160839132017-08-11 01:22:3538
Etienne Pierre-Doray01319562017-12-30 20:53:3339// Returns a similarity score between content in |old_image_index| and
Etienne Pierre-Dorayefe28342018-01-09 13:47:5340// |new_image_index| at regions described by |equivalence|, using
41// |targets_affinities| to evaluate similarity between references.
42double GetEquivalenceSimilarity(
43 const ImageIndex& old_image_index,
44 const ImageIndex& new_image_index,
45 const std::vector<TargetsAffinity>& targets_affinities,
46 const Equivalence& equivalence);
Etienne Pierre-Doray160839132017-08-11 01:22:3547
48// Extends |equivalence| forward and returns the result. This is related to
49// VisitEquivalenceSeed().
50EquivalenceCandidate ExtendEquivalenceForward(
Etienne Pierre-Doray01319562017-12-30 20:53:3351 const ImageIndex& old_image_index,
52 const ImageIndex& new_image_index,
Etienne Pierre-Dorayefe28342018-01-09 13:47:5353 const std::vector<TargetsAffinity>& targets_affinities,
Etienne Pierre-Doray160839132017-08-11 01:22:3554 const EquivalenceCandidate& equivalence,
55 double min_similarity);
56
57// Extends |equivalence| backward and returns the result. This is related to
58// VisitEquivalenceSeed().
59EquivalenceCandidate ExtendEquivalenceBackward(
Etienne Pierre-Doray01319562017-12-30 20:53:3360 const ImageIndex& old_image_index,
61 const ImageIndex& new_image_index,
Etienne Pierre-Dorayefe28342018-01-09 13:47:5362 const std::vector<TargetsAffinity>& targets_affinities,
Etienne Pierre-Doray160839132017-08-11 01:22:3563 const EquivalenceCandidate& equivalence,
64 double min_similarity);
65
66// Creates an equivalence, starting with |src| and |dst| as offset hint, and
67// extends it both forward and backward, trying to maximise similarity between
Etienne Pierre-Doray01319562017-12-30 20:53:3368// |old_image_index| and |new_image_index|, and returns the result.
Etienne Pierre-Dorayefe28342018-01-09 13:47:5369// |targets_affinities| is used to evaluate similarity between references.
Etienne Pierre-Doray01319562017-12-30 20:53:3370// |min_similarity| describes the minimum acceptable similarity score and is
71// used as threshold to discard bad equivalences.
Etienne Pierre-Dorayefe28342018-01-09 13:47:5372EquivalenceCandidate VisitEquivalenceSeed(
73 const ImageIndex& old_image_index,
74 const ImageIndex& new_image_index,
75 const std::vector<TargetsAffinity>& targets_affinities,
76 offset_t src,
77 offset_t dst,
78 double min_similarity);
Etienne Pierre-Doray160839132017-08-11 01:22:3579
Etienne Pierre-Doray78754aa2018-03-07 16:26:5280// Container of pruned equivalences used to map offsets from |old_image| to
81// offsets in |new_image|. Equivalences are pruned by cropping smaller
82// equivalences to avoid overlaps, to make the equivalence map (for covered
83// bytes in |old_image| and |new_image|) one-to-one.
84class OffsetMapper {
85 public:
Etienne Pierre-doray7abe67c2021-10-29 14:12:2386 using const_iterator = std::deque<Equivalence>::const_iterator;
Etienne Pierre-Doray78754aa2018-03-07 16:26:5287
Samuel Huangad7a5c02018-06-26 14:47:0288 // Constructors for various data sources. "Old" and "new" image sizes are
89 // needed for bounds checks and to handle dangling targets.
Etienne Pierre-Doray78754aa2018-03-07 16:26:5290 // - From a list of |equivalences|, already sorted (by |src_offset|) and
91 // pruned, useful for tests.
Etienne Pierre-doray7abe67c2021-10-29 14:12:2392 OffsetMapper(std::deque<Equivalence>&& equivalences,
Etienne Pierre-doray5946dbfa2018-09-10 16:19:3393 offset_t old_image_size,
94 offset_t new_image_size);
Etienne Pierre-Doray78754aa2018-03-07 16:26:5295 // - From a generator, useful for Zucchini-apply.
Samuel Huangad7a5c02018-06-26 14:47:0296 OffsetMapper(EquivalenceSource&& equivalence_source,
Etienne Pierre-doray5946dbfa2018-09-10 16:19:3397 offset_t old_image_size,
98 offset_t new_image_size);
Etienne Pierre-Doray78754aa2018-03-07 16:26:5299 // - From an EquivalenceMap that needs to be processed, useful for
100 // Zucchini-gen.
Samuel Huangad7a5c02018-06-26 14:47:02101 OffsetMapper(const EquivalenceMap& equivalence_map,
Etienne Pierre-doray5946dbfa2018-09-10 16:19:33102 offset_t old_image_size,
103 offset_t new_image_size);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52104 ~OffsetMapper();
105
106 size_t size() const { return equivalences_.size(); }
107 const_iterator begin() const { return equivalences_.begin(); }
108 const_iterator end() const { return equivalences_.end(); }
109
Samuel Huangad7a5c02018-06-26 14:47:02110 // Returns naive extended forward-projection of "old" |offset| that follows
111 // |eq|'s delta. |eq| needs not cover |offset|.
112 // - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
113 // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
114 // caller must to make the check (hence "naive").
115 offset_t NaiveExtendedForwardProject(const Equivalence& unit,
116 offset_t offset) const;
117
Etienne Pierre-Doray78754aa2018-03-07 16:26:52118 // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
Samuel Huangad7a5c02018-06-26 14:47:02119 // Assumes |equivalences_| to be non-empty. Cases:
120 // - If |offset| is covered (i.e., in an "old" block), then use the delta of
121 // the (unique) equivalence unit that covers |offset|.
122 // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
123 // nearest "old" block, use its delta, and avert underflow / overflow by
124 // clamping the result to |[0, new_image_size_)|.
125 // - If |offset| is >= |new_image_size_| (a "fake offset"), then use
126 // |new_image_size_ - old_image_size_| as the delta.
127 offset_t ExtendedForwardProject(offset_t offset) const;
Etienne Pierre-Doray78754aa2018-03-07 16:26:52128
129 // Given sorted |offsets|, applies a projection in-place of all offsets that
130 // are part of a pruned equivalence from |old_image| to |new_image|. Other
131 // offsets are removed from |offsets|.
Etienne Pierre-doray380557e2021-09-14 17:31:51132 void ForwardProjectAll(std::deque<offset_t>* offsets) const;
Etienne Pierre-Doray78754aa2018-03-07 16:26:52133
134 // Accessor for testing.
Etienne Pierre-doray7abe67c2021-10-29 14:12:23135 const std::deque<Equivalence> equivalences() const { return equivalences_; }
Etienne Pierre-Doray78754aa2018-03-07 16:26:52136
137 // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
138 // source location that was covered by some Equivalence would become covered
139 // by exactly one Equivalence. Moreover, for the offset, the equivalence
140 // corresponds to the largest (pre-pruning) covering Equivalence, and in case
141 // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
142 // change in size since empty Equivalences are removed.
143 static void PruneEquivalencesAndSortBySource(
Etienne Pierre-doray7abe67c2021-10-29 14:12:23144 std::deque<Equivalence>* equivalences);
Etienne Pierre-Doray78754aa2018-03-07 16:26:52145
146 private:
Samuel Huangad7a5c02018-06-26 14:47:02147 // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
148 // block overlaps). Also, it is sorted by "old" offsets.
Etienne Pierre-doray7abe67c2021-10-29 14:12:23149 std::deque<Equivalence> equivalences_;
Etienne Pierre-doray5946dbfa2018-09-10 16:19:33150 const offset_t old_image_size_;
151 const offset_t new_image_size_;
Etienne Pierre-Doray78754aa2018-03-07 16:26:52152};
153
Etienne Pierre-Doray01319562017-12-30 20:53:33154// Container of equivalences between |old_image_index| and |new_image_index|,
155// sorted by |Equivalence::dst_offset|, only used during patch generation.
Etienne Pierre-Doray160839132017-08-11 01:22:35156class EquivalenceMap {
157 public:
158 using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
159
160 EquivalenceMap();
Etienne Pierre-Dorayf5e4fa82017-08-15 17:58:18161 // Initializes the object with |equivalences|.
Etienne Pierre-Doray78754aa2018-03-07 16:26:52162 explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
Etienne Pierre-Doray65b0a03a2017-08-18 17:24:02163 EquivalenceMap(EquivalenceMap&&);
164 EquivalenceMap(const EquivalenceMap&) = delete;
Etienne Pierre-Doray160839132017-08-11 01:22:35165 ~EquivalenceMap();
166
Etienne Pierre-Dorayefe28342018-01-09 13:47:53167 // Finds relevant equivalences between |old_view| and |new_view|, using
168 // suffix array |old_sa| computed from |old_view| and using
169 // |targets_affinities| to evaluate similarity between references. This
170 // function is not symmetric. Equivalences might overlap in |old_view|, but
171 // not in |new_view|. It tries to maximize accumulated similarity within each
172 // equivalence, while maximizing |new_view| coverage. The minimum similarity
173 // of an equivalence is given by |min_similarity|.
Etienne Pierre-Doray160839132017-08-11 01:22:35174 void Build(const std::vector<offset_t>& old_sa,
Etienne Pierre-Doray01319562017-12-30 20:53:33175 const EncodedView& old_view,
176 const EncodedView& new_view,
Etienne Pierre-Dorayefe28342018-01-09 13:47:53177 const std::vector<TargetsAffinity>& targets_affinities,
Etienne Pierre-Doray160839132017-08-11 01:22:35178 double min_similarity);
179
180 size_t size() const { return candidates_.size(); }
181 const_iterator begin() const { return candidates_.begin(); }
182 const_iterator end() const { return candidates_.end(); }
183
184 private:
Etienne Pierre-Doray01319562017-12-30 20:53:33185 // Discovers equivalence candidates between |old_view| and |new_view| and
Etienne Pierre-Doray160839132017-08-11 01:22:35186 // stores them in the object. Note that resulting candidates are not sorted
187 // and might be overlapping in new image.
188 void CreateCandidates(const std::vector<offset_t>& old_sa,
Etienne Pierre-Doray01319562017-12-30 20:53:33189 const EncodedView& old_view,
190 const EncodedView& new_view,
Etienne Pierre-Dorayefe28342018-01-09 13:47:53191 const std::vector<TargetsAffinity>& targets_affinities,
Etienne Pierre-Doray160839132017-08-11 01:22:35192 double min_similarity);
193 // Sorts candidates by their offset in new image.
194 void SortByDestination();
195 // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
196 // overlaps. Candidates with low similarity scores are more likely to be
197 // shrunken. Unfit candidates may be removed.
Etienne Pierre-Doray01319562017-12-30 20:53:33198 void Prune(const EncodedView& old_view,
199 const EncodedView& new_view,
Etienne Pierre-Dorayefe28342018-01-09 13:47:53200 const std::vector<TargetsAffinity>& targets_affinities,
Etienne Pierre-Doray160839132017-08-11 01:22:35201 double min_similarity);
202
203 std::vector<EquivalenceCandidate> candidates_;
Etienne Pierre-Doray160839132017-08-11 01:22:35204};
205
206} // namespace zucchini
207
Samuel Huang577ef6c2018-03-13 18:19:34208#endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_