Avi Drissman | 8ba1bad | 2022-09-13 19:22:36 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 5 | #ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ |
| 6 | #define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 7 | |
| 8 | #include <stddef.h> |
| 9 | |
Etienne Pierre-doray | 7abe67c | 2021-10-29 14:12:23 | [diff] [blame] | 10 | #include <deque> |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 11 | #include <limits> |
| 12 | #include <vector> |
| 13 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 14 | #include "components/zucchini/image_index.h" |
| 15 | #include "components/zucchini/image_utils.h" |
| 16 | #include "components/zucchini/targets_affinity.h" |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 17 | |
| 18 | namespace zucchini { |
| 19 | |
| 20 | constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity(); |
| 21 | |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 22 | class EncodedView; |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 23 | class EquivalenceSource; |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 24 | |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 25 | // Returns similarity score between a token (raw byte or first byte of a |
| 26 | // reference) in |old_image_index| at |src| and a token in |new_image_index| |
| 27 | // at |dst|. |targets_affinities| describes affinities for each target pool and |
| 28 | // is used to evaluate similarity between references, hence it's size must be |
| 29 | // equal to the number of pools in both |old_image_index| and |new_image_index|. |
| 30 | // Both |src| and |dst| must refer to tokens in |old_image_index| and |
| 31 | // |new_image_index|. |
| 32 | double GetTokenSimilarity( |
| 33 | const ImageIndex& old_image_index, |
| 34 | const ImageIndex& new_image_index, |
| 35 | const std::vector<TargetsAffinity>& targets_affinities, |
| 36 | offset_t src, |
| 37 | offset_t dst); |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 38 | |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 39 | // Returns a similarity score between content in |old_image_index| and |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 40 | // |new_image_index| at regions described by |equivalence|, using |
| 41 | // |targets_affinities| to evaluate similarity between references. |
| 42 | double GetEquivalenceSimilarity( |
| 43 | const ImageIndex& old_image_index, |
| 44 | const ImageIndex& new_image_index, |
| 45 | const std::vector<TargetsAffinity>& targets_affinities, |
| 46 | const Equivalence& equivalence); |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 47 | |
| 48 | // Extends |equivalence| forward and returns the result. This is related to |
| 49 | // VisitEquivalenceSeed(). |
| 50 | EquivalenceCandidate ExtendEquivalenceForward( |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 51 | const ImageIndex& old_image_index, |
| 52 | const ImageIndex& new_image_index, |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 53 | const std::vector<TargetsAffinity>& targets_affinities, |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 54 | const EquivalenceCandidate& equivalence, |
| 55 | double min_similarity); |
| 56 | |
| 57 | // Extends |equivalence| backward and returns the result. This is related to |
| 58 | // VisitEquivalenceSeed(). |
| 59 | EquivalenceCandidate ExtendEquivalenceBackward( |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 60 | const ImageIndex& old_image_index, |
| 61 | const ImageIndex& new_image_index, |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 62 | const std::vector<TargetsAffinity>& targets_affinities, |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 63 | const EquivalenceCandidate& equivalence, |
| 64 | double min_similarity); |
| 65 | |
| 66 | // Creates an equivalence, starting with |src| and |dst| as offset hint, and |
| 67 | // extends it both forward and backward, trying to maximise similarity between |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 68 | // |old_image_index| and |new_image_index|, and returns the result. |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 69 | // |targets_affinities| is used to evaluate similarity between references. |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 70 | // |min_similarity| describes the minimum acceptable similarity score and is |
| 71 | // used as threshold to discard bad equivalences. |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 72 | EquivalenceCandidate VisitEquivalenceSeed( |
| 73 | const ImageIndex& old_image_index, |
| 74 | const ImageIndex& new_image_index, |
| 75 | const std::vector<TargetsAffinity>& targets_affinities, |
| 76 | offset_t src, |
| 77 | offset_t dst, |
| 78 | double min_similarity); |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 79 | |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 80 | // Container of pruned equivalences used to map offsets from |old_image| to |
| 81 | // offsets in |new_image|. Equivalences are pruned by cropping smaller |
| 82 | // equivalences to avoid overlaps, to make the equivalence map (for covered |
| 83 | // bytes in |old_image| and |new_image|) one-to-one. |
| 84 | class OffsetMapper { |
| 85 | public: |
Etienne Pierre-doray | 7abe67c | 2021-10-29 14:12:23 | [diff] [blame] | 86 | using const_iterator = std::deque<Equivalence>::const_iterator; |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 87 | |
Samuel Huang | ad7a5c0 | 2018-06-26 14:47:02 | [diff] [blame] | 88 | // Constructors for various data sources. "Old" and "new" image sizes are |
| 89 | // needed for bounds checks and to handle dangling targets. |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 90 | // - From a list of |equivalences|, already sorted (by |src_offset|) and |
| 91 | // pruned, useful for tests. |
Etienne Pierre-doray | 7abe67c | 2021-10-29 14:12:23 | [diff] [blame] | 92 | OffsetMapper(std::deque<Equivalence>&& equivalences, |
Etienne Pierre-doray | 5946dbfa | 2018-09-10 16:19:33 | [diff] [blame] | 93 | offset_t old_image_size, |
| 94 | offset_t new_image_size); |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 95 | // - From a generator, useful for Zucchini-apply. |
Samuel Huang | ad7a5c0 | 2018-06-26 14:47:02 | [diff] [blame] | 96 | OffsetMapper(EquivalenceSource&& equivalence_source, |
Etienne Pierre-doray | 5946dbfa | 2018-09-10 16:19:33 | [diff] [blame] | 97 | offset_t old_image_size, |
| 98 | offset_t new_image_size); |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 99 | // - From an EquivalenceMap that needs to be processed, useful for |
| 100 | // Zucchini-gen. |
Samuel Huang | ad7a5c0 | 2018-06-26 14:47:02 | [diff] [blame] | 101 | OffsetMapper(const EquivalenceMap& equivalence_map, |
Etienne Pierre-doray | 5946dbfa | 2018-09-10 16:19:33 | [diff] [blame] | 102 | offset_t old_image_size, |
| 103 | offset_t new_image_size); |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 104 | ~OffsetMapper(); |
| 105 | |
| 106 | size_t size() const { return equivalences_.size(); } |
| 107 | const_iterator begin() const { return equivalences_.begin(); } |
| 108 | const_iterator end() const { return equivalences_.end(); } |
| 109 | |
Samuel Huang | ad7a5c0 | 2018-06-26 14:47:02 | [diff] [blame] | 110 | // Returns naive extended forward-projection of "old" |offset| that follows |
| 111 | // |eq|'s delta. |eq| needs not cover |offset|. |
| 112 | // - Averts underflow / overflow by clamping to |[0, new_image_size_)|. |
| 113 | // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the |
| 114 | // caller must to make the check (hence "naive"). |
| 115 | offset_t NaiveExtendedForwardProject(const Equivalence& unit, |
| 116 | offset_t offset) const; |
| 117 | |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 118 | // Returns an offset in |new_image| corresponding to |offset| in |old_image|. |
Samuel Huang | ad7a5c0 | 2018-06-26 14:47:02 | [diff] [blame] | 119 | // Assumes |equivalences_| to be non-empty. Cases: |
| 120 | // - If |offset| is covered (i.e., in an "old" block), then use the delta of |
| 121 | // the (unique) equivalence unit that covers |offset|. |
| 122 | // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the |
| 123 | // nearest "old" block, use its delta, and avert underflow / overflow by |
| 124 | // clamping the result to |[0, new_image_size_)|. |
| 125 | // - If |offset| is >= |new_image_size_| (a "fake offset"), then use |
| 126 | // |new_image_size_ - old_image_size_| as the delta. |
| 127 | offset_t ExtendedForwardProject(offset_t offset) const; |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 128 | |
| 129 | // Given sorted |offsets|, applies a projection in-place of all offsets that |
| 130 | // are part of a pruned equivalence from |old_image| to |new_image|. Other |
| 131 | // offsets are removed from |offsets|. |
Etienne Pierre-doray | 380557e | 2021-09-14 17:31:51 | [diff] [blame] | 132 | void ForwardProjectAll(std::deque<offset_t>* offsets) const; |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 133 | |
| 134 | // Accessor for testing. |
Etienne Pierre-doray | 7abe67c | 2021-10-29 14:12:23 | [diff] [blame] | 135 | const std::deque<Equivalence> equivalences() const { return equivalences_; } |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 136 | |
| 137 | // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a |
| 138 | // source location that was covered by some Equivalence would become covered |
| 139 | // by exactly one Equivalence. Moreover, for the offset, the equivalence |
| 140 | // corresponds to the largest (pre-pruning) covering Equivalence, and in case |
| 141 | // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may |
| 142 | // change in size since empty Equivalences are removed. |
| 143 | static void PruneEquivalencesAndSortBySource( |
Etienne Pierre-doray | 7abe67c | 2021-10-29 14:12:23 | [diff] [blame] | 144 | std::deque<Equivalence>* equivalences); |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 145 | |
| 146 | private: |
Samuel Huang | ad7a5c0 | 2018-06-26 14:47:02 | [diff] [blame] | 147 | // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new" |
| 148 | // block overlaps). Also, it is sorted by "old" offsets. |
Etienne Pierre-doray | 7abe67c | 2021-10-29 14:12:23 | [diff] [blame] | 149 | std::deque<Equivalence> equivalences_; |
Etienne Pierre-doray | 5946dbfa | 2018-09-10 16:19:33 | [diff] [blame] | 150 | const offset_t old_image_size_; |
| 151 | const offset_t new_image_size_; |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 152 | }; |
| 153 | |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 154 | // Container of equivalences between |old_image_index| and |new_image_index|, |
| 155 | // sorted by |Equivalence::dst_offset|, only used during patch generation. |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 156 | class EquivalenceMap { |
| 157 | public: |
| 158 | using const_iterator = std::vector<EquivalenceCandidate>::const_iterator; |
| 159 | |
| 160 | EquivalenceMap(); |
Etienne Pierre-Doray | f5e4fa8 | 2017-08-15 17:58:18 | [diff] [blame] | 161 | // Initializes the object with |equivalences|. |
Etienne Pierre-Doray | 78754aa | 2018-03-07 16:26:52 | [diff] [blame] | 162 | explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates); |
Etienne Pierre-Doray | 65b0a03a | 2017-08-18 17:24:02 | [diff] [blame] | 163 | EquivalenceMap(EquivalenceMap&&); |
| 164 | EquivalenceMap(const EquivalenceMap&) = delete; |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 165 | ~EquivalenceMap(); |
| 166 | |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 167 | // Finds relevant equivalences between |old_view| and |new_view|, using |
| 168 | // suffix array |old_sa| computed from |old_view| and using |
| 169 | // |targets_affinities| to evaluate similarity between references. This |
| 170 | // function is not symmetric. Equivalences might overlap in |old_view|, but |
| 171 | // not in |new_view|. It tries to maximize accumulated similarity within each |
| 172 | // equivalence, while maximizing |new_view| coverage. The minimum similarity |
| 173 | // of an equivalence is given by |min_similarity|. |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 174 | void Build(const std::vector<offset_t>& old_sa, |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 175 | const EncodedView& old_view, |
| 176 | const EncodedView& new_view, |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 177 | const std::vector<TargetsAffinity>& targets_affinities, |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 178 | double min_similarity); |
| 179 | |
| 180 | size_t size() const { return candidates_.size(); } |
| 181 | const_iterator begin() const { return candidates_.begin(); } |
| 182 | const_iterator end() const { return candidates_.end(); } |
| 183 | |
| 184 | private: |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 185 | // Discovers equivalence candidates between |old_view| and |new_view| and |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 186 | // stores them in the object. Note that resulting candidates are not sorted |
| 187 | // and might be overlapping in new image. |
| 188 | void CreateCandidates(const std::vector<offset_t>& old_sa, |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 189 | const EncodedView& old_view, |
| 190 | const EncodedView& new_view, |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 191 | const std::vector<TargetsAffinity>& targets_affinities, |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 192 | double min_similarity); |
| 193 | // Sorts candidates by their offset in new image. |
| 194 | void SortByDestination(); |
| 195 | // Visits |candidates_| (sorted by |dst_offset|) and remove all destination |
| 196 | // overlaps. Candidates with low similarity scores are more likely to be |
| 197 | // shrunken. Unfit candidates may be removed. |
Etienne Pierre-Doray | 0131956 | 2017-12-30 20:53:33 | [diff] [blame] | 198 | void Prune(const EncodedView& old_view, |
| 199 | const EncodedView& new_view, |
Etienne Pierre-Doray | efe2834 | 2018-01-09 13:47:53 | [diff] [blame] | 200 | const std::vector<TargetsAffinity>& targets_affinities, |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 201 | double min_similarity); |
| 202 | |
| 203 | std::vector<EquivalenceCandidate> candidates_; |
Etienne Pierre-Doray | 16083913 | 2017-08-11 01:22:35 | [diff] [blame] | 204 | }; |
| 205 | |
| 206 | } // namespace zucchini |
| 207 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 208 | #endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ |