etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 1 | // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 5 | #ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ |
| 6 | #define COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 7 | |
Samuel Huang | e8d07b75 | 2017-08-21 16:05:25 | [diff] [blame] | 8 | #include <stddef.h> |
| 9 | |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 10 | #include <memory> |
| 11 | #include <string> |
| 12 | #include <vector> |
| 13 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 14 | #include "components/zucchini/buffer_view.h" |
| 15 | #include "components/zucchini/image_utils.h" |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 16 | |
| 17 | namespace zucchini { |
| 18 | |
Etienne Pierre-doray | 3c64e078 | 2018-08-10 17:44:37 | [diff] [blame] | 19 | // A vacuous ReferenceReader that produces no references. |
| 20 | class EmptyReferenceReader : public ReferenceReader { |
| 21 | public: |
Anton Bikineev | 1156b5f | 2021-05-15 22:35:36 | [diff] [blame] | 22 | absl::optional<Reference> GetNext() override; |
Etienne Pierre-doray | 3c64e078 | 2018-08-10 17:44:37 | [diff] [blame] | 23 | }; |
| 24 | |
Samuel Huang | d46be686 | 2018-12-31 19:59:22 | [diff] [blame] | 25 | // A vacuous EmptyReferenceWriter that does not write. |
| 26 | class EmptyReferenceWriter : public ReferenceWriter { |
| 27 | public: |
| 28 | void PutNext(Reference reference) override; |
| 29 | }; |
| 30 | |
Peter Collingbourne | fea64fd | 2018-06-13 22:17:49 | [diff] [blame] | 31 | // Disassembler needs to be declared before ReferenceGroup because the latter |
| 32 | // contains member pointers based on the former, and we use a compiler flag, |
| 33 | // -fcomplete-member-pointers, which enforces that member pointer base types are |
| 34 | // complete. This flag helps prevent us from running into problems in the |
| 35 | // Microsoft C++ ABI (see https://crbug.com/847724). |
| 36 | |
| 37 | class ReferenceGroup; |
| 38 | |
| 39 | // A Disassembler is used to encapsulate architecture specific operations, to: |
| 40 | // - Describe types of references found in the architecture using traits. |
| 41 | // - Extract references contained in an image file. |
| 42 | // - Correct target for some references. |
| 43 | class Disassembler { |
| 44 | public: |
| 45 | // Attempts to parse |image| and create an architecture-specifc Disassembler, |
| 46 | // as determined by DIS, which is inherited from Disassembler. Returns an |
| 47 | // instance of DIS if successful, and null otherwise. |
| 48 | template <class DIS> |
| 49 | static std::unique_ptr<DIS> Make(ConstBufferView image) { |
| 50 | auto disasm = std::make_unique<DIS>(); |
| 51 | if (!disasm->Parse(image)) |
| 52 | return nullptr; |
| 53 | return disasm; |
| 54 | } |
| 55 | |
Samuel Huang | ba0e1f5 | 2021-08-13 15:42:26 | [diff] [blame] | 56 | Disassembler(const Disassembler&) = delete; |
| 57 | const Disassembler& operator=(const Disassembler&) = delete; |
Peter Collingbourne | fea64fd | 2018-06-13 22:17:49 | [diff] [blame] | 58 | virtual ~Disassembler(); |
| 59 | |
| 60 | // Returns the type of executable handled by the Disassembler. |
| 61 | virtual ExecutableType GetExeType() const = 0; |
| 62 | |
| 63 | // Returns a more detailed description of the executable type. |
| 64 | virtual std::string GetExeTypeString() const = 0; |
| 65 | |
| 66 | // Creates and returns a vector that contains all groups of references. |
| 67 | // Groups must be aggregated by pool. |
| 68 | virtual std::vector<ReferenceGroup> MakeReferenceGroups() const = 0; |
| 69 | |
| 70 | ConstBufferView image() const { return image_; } |
| 71 | size_t size() const { return image_.size(); } |
| 72 | |
| 73 | int num_equivalence_iterations() const { return num_equivalence_iterations_; } |
| 74 | |
| 75 | protected: |
| 76 | explicit Disassembler(int num_equivalence_iterations); |
| 77 | |
| 78 | // Parses |image| and initializes internal states. Returns true on success. |
| 79 | // This must be called once and before any other operation. |
| 80 | virtual bool Parse(ConstBufferView image) = 0; |
| 81 | |
| 82 | // Raw image data. After Parse(), a Disassembler should shrink this to contain |
| 83 | // only the portion containing the executable file it recognizes. |
| 84 | ConstBufferView image_; |
| 85 | |
| 86 | // The number of iterations to run for equivalence map generation. This should |
| 87 | // roughly be the max length of reference indirection chains. |
| 88 | int num_equivalence_iterations_; |
Peter Collingbourne | fea64fd | 2018-06-13 22:17:49 | [diff] [blame] | 89 | }; |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 90 | |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 91 | // A ReferenceGroup is associated with a specific |type| and has convenience |
| 92 | // methods to obtain readers and writers for that type. A ReferenceGroup does |
Etienne Pierre-doray | 71ca465c4 | 2017-07-06 00:57:18 | [diff] [blame] | 93 | // not store references; it is a lightweight class that communicates with the |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 94 | // disassembler to operate on them. |
| 95 | class ReferenceGroup { |
| 96 | public: |
| 97 | // Member function pointer used to obtain a ReferenceReader. |
| 98 | using ReaderFactory = std::unique_ptr<ReferenceReader> ( |
| 99 | Disassembler::*)(offset_t lower, offset_t upper); |
| 100 | |
| 101 | // Member function pointer used to obtain a ReferenceWriter. |
| 102 | using WriterFactory = std::unique_ptr<ReferenceWriter> (Disassembler::*)( |
| 103 | MutableBufferView image); |
| 104 | |
Etienne Pierre-doray | c214323 | 2022-03-26 01:10:19 | [diff] [blame] | 105 | // Member function pointer used to obtain a ReferenceMixer. |
| 106 | using MixerFactory = std::unique_ptr<ReferenceMixer> ( |
| 107 | Disassembler::*)(ConstBufferView old_image, ConstBufferView new_image); |
| 108 | |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 109 | // RefinedGeneratorFactory and RefinedReceptorFactory don't have to be |
| 110 | // identical to GeneratorFactory and ReceptorFactory, but they must be |
| 111 | // convertible. As a result, they can be pointer to member function of a |
| 112 | // derived Disassembler. |
| 113 | template <class RefinedReaderFactory, class RefinedWriterFactory> |
| 114 | ReferenceGroup(ReferenceTypeTraits traits, |
| 115 | RefinedReaderFactory reader_factory, |
| 116 | RefinedWriterFactory writer_factory) |
| 117 | : traits_(traits), |
| 118 | reader_factory_(static_cast<ReaderFactory>(reader_factory)), |
| 119 | writer_factory_(static_cast<WriterFactory>(writer_factory)) {} |
| 120 | |
Etienne Pierre-doray | c214323 | 2022-03-26 01:10:19 | [diff] [blame] | 121 | template <class RefinedReaderFactory, |
| 122 | class RefinedWriterFactory, |
| 123 | class RefinedMixerFactory> |
| 124 | ReferenceGroup(ReferenceTypeTraits traits, |
| 125 | RefinedReaderFactory reader_factory, |
| 126 | RefinedWriterFactory writer_factory, |
| 127 | RefinedMixerFactory mixer_factory) |
| 128 | : traits_(traits), |
| 129 | reader_factory_(static_cast<ReaderFactory>(reader_factory)), |
| 130 | writer_factory_(static_cast<WriterFactory>(writer_factory)), |
| 131 | mixer_factory_(static_cast<MixerFactory>(mixer_factory)) {} |
| 132 | |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 133 | // Returns a reader for all references in the binary. |
| 134 | // Invalidates any other writer or reader previously obtained for |disasm|. |
| 135 | std::unique_ptr<ReferenceReader> GetReader(Disassembler* disasm) const; |
| 136 | |
| 137 | // Returns a reader for references whose bytes are entirely contained in |
| 138 | // |[lower, upper)|. |
| 139 | // Invalidates any other writer or reader previously obtained for |disasm|. |
| 140 | std::unique_ptr<ReferenceReader> GetReader(offset_t lower, |
| 141 | offset_t upper, |
| 142 | Disassembler* disasm) const; |
| 143 | |
| 144 | // Returns a writer for references in |image|, assuming that |image| was the |
| 145 | // same one initially parsed by |disasm|. |
| 146 | // Invalidates any other writer or reader previously obtained for |disasm|. |
| 147 | std::unique_ptr<ReferenceWriter> GetWriter(MutableBufferView image, |
| 148 | Disassembler* disasm) const; |
| 149 | |
Etienne Pierre-doray | c214323 | 2022-03-26 01:10:19 | [diff] [blame] | 150 | // Returns mixer for references between |old_image| and |new_image|, assuming |
| 151 | // they both contain the same type of executable as |disasm|. |
| 152 | std::unique_ptr<ReferenceMixer> GetMixer(ConstBufferView old_image, |
| 153 | ConstBufferView new_image, |
| 154 | Disassembler* disasm) const; |
| 155 | |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 156 | // Returns traits describing the reference type. |
Etienne Pierre-doray | 71ca465c4 | 2017-07-06 00:57:18 | [diff] [blame] | 157 | const ReferenceTypeTraits& traits() const { return traits_; } |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 158 | |
Etienne Pierre-doray | 71ca465c4 | 2017-07-06 00:57:18 | [diff] [blame] | 159 | // Shorthand for traits().width. |
| 160 | offset_t width() const { return traits().width; } |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 161 | |
Etienne Pierre-doray | 71ca465c4 | 2017-07-06 00:57:18 | [diff] [blame] | 162 | // Shorthand for traits().type_tag. |
| 163 | TypeTag type_tag() const { return traits().type_tag; } |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 164 | |
Etienne Pierre-doray | 71ca465c4 | 2017-07-06 00:57:18 | [diff] [blame] | 165 | // Shorthand for traits().pool_tag. |
| 166 | PoolTag pool_tag() const { return traits().pool_tag; } |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 167 | |
Etienne Pierre-doray | 71ca465c4 | 2017-07-06 00:57:18 | [diff] [blame] | 168 | private: |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 169 | ReferenceTypeTraits traits_; |
| 170 | ReaderFactory reader_factory_ = nullptr; |
| 171 | WriterFactory writer_factory_ = nullptr; |
Etienne Pierre-doray | c214323 | 2022-03-26 01:10:19 | [diff] [blame] | 172 | MixerFactory mixer_factory_ = nullptr; |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 173 | }; |
| 174 | |
etiennep | 9c70aaafd | 2017-06-30 17:43:13 | [diff] [blame] | 175 | } // namespace zucchini |
| 176 | |
Samuel Huang | 577ef6c | 2018-03-13 18:19:34 | [diff] [blame] | 177 | #endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_H_ |