peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 1 | #ifndef FORTRAN_PREPROCESSOR_H_ |
| 2 | #define FORTRAN_PREPROCESSOR_H_ |
| 3 | |
| 4 | // A Fortran-aware preprocessing module used by the prescanner to implement |
| 5 | // preprocessing directives and macro replacement. Intended to be efficient |
| 6 | // enough to always run on all source files even when no preprocessing is |
| 7 | // needed, so that special compiler command options &/or source file name |
| 8 | // extensions for preprocessing will not be necessary. |
| 9 | |
| 10 | #include "idioms.h" |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 11 | #include "provenance.h" |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 12 | #include <cctype> |
| 13 | #include <cstring> |
| 14 | #include <functional> |
| 15 | #include <list> |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 16 | #include <sstream> |
peter klausler | 16c6f53 | 2018-02-05 22:29:26 | [diff] [blame] | 17 | #include <stack> |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 18 | #include <string> |
| 19 | #include <unordered_map> |
| 20 | #include <vector> |
| 21 | |
| 22 | namespace Fortran { |
peter klausler | 0ba1a14 | 2018-02-07 20:04:42 | [diff] [blame] | 23 | namespace parser { |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 24 | |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 25 | class CookedSource; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 26 | class Prescanner; |
| 27 | |
| 28 | // Just a const char pointer with an associated length; does not own the |
| 29 | // referenced data. Used to describe buffered tokens and hash table keys. |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 30 | class CharPointerWithLength { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 31 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 32 | CharPointerWithLength() {} |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 33 | CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {} |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 34 | CharPointerWithLength(const std::string &s) |
| 35 | : data_{s.data()}, bytes_{s.size()} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 36 | CharPointerWithLength(const CharPointerWithLength &that) |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 37 | : data_{that.data_}, bytes_{that.bytes_} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 38 | CharPointerWithLength &operator=(const CharPointerWithLength &that) { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 39 | data_ = that.data_; |
| 40 | bytes_ = that.bytes_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 41 | return *this; |
| 42 | } |
| 43 | |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 44 | bool empty() const { return bytes_ == 0; } |
| 45 | size_t size() const { return bytes_; } |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 46 | const char &operator[](size_t j) const { return data_[j]; } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 47 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 48 | bool IsBlank() const; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 49 | std::string ToString() const { return std::string{data_, bytes_}; } |
| 50 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 51 | private: |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 52 | const char *data_{nullptr}; |
| 53 | size_t bytes_{0}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 54 | }; |
peter klausler | 0ba1a14 | 2018-02-07 20:04:42 | [diff] [blame] | 55 | } // namespace parser |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 56 | } // namespace Fortran |
| 57 | |
| 58 | // Specializations to enable std::unordered_map<CharPointerWithLength, ...> |
peter klausler | 0ba1a14 | 2018-02-07 20:04:42 | [diff] [blame] | 59 | template<> struct std::hash<Fortran::parser::CharPointerWithLength> { |
| 60 | size_t operator()(const Fortran::parser::CharPointerWithLength &x) const { |
peter klausler | be830d6 | 2018-02-05 20:48:09 | [diff] [blame] | 61 | size_t hash{0}, bytes{x.size()}; |
| 62 | for (size_t j{0}; j < bytes; ++j) { |
| 63 | hash = (hash * 31) ^ x[j]; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 64 | } |
| 65 | return hash; |
| 66 | } |
| 67 | }; |
| 68 | |
peter klausler | 0ba1a14 | 2018-02-07 20:04:42 | [diff] [blame] | 69 | template<> struct std::equal_to<Fortran::parser::CharPointerWithLength> { |
| 70 | bool operator()(const Fortran::parser::CharPointerWithLength &x, |
| 71 | const Fortran::parser::CharPointerWithLength &y) const { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 72 | return x.size() == y.size() && |
peter klausler | 16c6f53 | 2018-02-05 22:29:26 | [diff] [blame] | 73 | std::memcmp(static_cast<const void *>(&x[0]), |
| 74 | static_cast<const void *>(&y[0]), x.size()) == 0; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 75 | } |
| 76 | }; |
| 77 | |
| 78 | namespace Fortran { |
peter klausler | 0ba1a14 | 2018-02-07 20:04:42 | [diff] [blame] | 79 | namespace parser { |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 80 | |
| 81 | // Buffers a contiguous sequence of characters that has been partitioned into |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 82 | // a sequence of preprocessing tokens with provenances. |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 83 | class TokenSequence { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 84 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 85 | TokenSequence() {} |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 86 | TokenSequence(const TokenSequence &that) { Put(that); } |
| 87 | TokenSequence(const TokenSequence &that, size_t at, size_t count = 1) { |
| 88 | Put(that, at, count); |
| 89 | } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 90 | TokenSequence(TokenSequence &&that) |
peter klausler | 16c6f53 | 2018-02-05 22:29:26 | [diff] [blame] | 91 | : start_{std::move(that.start_)}, |
| 92 | nextStart_{that.nextStart_}, char_{std::move(that.char_)} {} |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 93 | TokenSequence(const std::string &s) { Put(s, 0); } // TODO predefined prov. |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 94 | |
| 95 | TokenSequence &operator=(const TokenSequence &that) { |
| 96 | clear(); |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 97 | Put(that); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 98 | return *this; |
| 99 | } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 100 | TokenSequence &operator=(TokenSequence &&that) { |
| 101 | start_ = std::move(that.start_); |
| 102 | nextStart_ = that.nextStart_; |
| 103 | char_ = std::move(that.char_); |
| 104 | return *this; |
| 105 | } |
| 106 | |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 107 | CharPointerWithLength operator[](size_t token) const { |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 108 | return {&char_[start_[token]], TokenBytes(token)}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 109 | } |
| 110 | |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 111 | bool empty() const { return start_.empty(); } |
| 112 | size_t size() const { return start_.size(); } |
| 113 | const char *data() const { return &char_[0]; } |
| 114 | void clear(); |
| 115 | void pop_back(); |
| 116 | void shrink_to_fit(); |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 117 | |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 118 | void PutNextTokenChar(char ch, Provenance provenance) { |
| 119 | char_.emplace_back(ch); |
| 120 | provenances_.Put({provenance, 1}); |
| 121 | } |
| 122 | |
| 123 | void CloseToken() { |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 124 | // CHECK(char_.size() > nextStart_); |
| 125 | start_.emplace_back(nextStart_); |
| 126 | nextStart_ = char_.size(); |
| 127 | } |
| 128 | |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 129 | void ReopenLastToken() { |
| 130 | nextStart_ = start_.back(); |
| 131 | start_.pop_back(); |
| 132 | } |
| 133 | |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 134 | void Put(const TokenSequence &); |
| 135 | void Put(const TokenSequence &, size_t at, size_t tokens = 1); |
| 136 | void Put(const char *, size_t, Provenance); |
| 137 | void Put(const CharPointerWithLength &, Provenance); |
| 138 | void Put(const std::string &, Provenance); |
| 139 | void Put(const std::stringstream &, Provenance); |
| 140 | void EmitWithCaseConversion(CookedSource *) const; |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 141 | std::string ToString() const; |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 142 | ProvenanceRange GetProvenance(size_t token, size_t offset = 0) const; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 143 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 144 | private: |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 145 | size_t TokenBytes(size_t token) const { |
| 146 | return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) - |
| 147 | start_[token]; |
| 148 | } |
| 149 | |
| 150 | std::vector<size_t> start_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 151 | size_t nextStart_{0}; |
| 152 | std::vector<char> char_; |
peter klausler | 09865ff | 2018-02-09 22:04:11 | [diff] [blame^] | 153 | OffsetToProvenanceMappings provenances_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 154 | }; |
| 155 | |
| 156 | // Defines a macro |
| 157 | class Definition { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 158 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 159 | Definition(const TokenSequence &, size_t firstToken, size_t tokens); |
| 160 | Definition(const std::vector<std::string> &argNames, const TokenSequence &, |
peter klausler | 16c6f53 | 2018-02-05 22:29:26 | [diff] [blame] | 161 | size_t firstToken, size_t tokens, bool isVariadic = false); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 162 | explicit Definition(const std::string &predefined); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 163 | |
| 164 | bool isFunctionLike() const { return isFunctionLike_; } |
| 165 | size_t argumentCount() const { return argumentCount_; } |
| 166 | bool isVariadic() const { return isVariadic_; } |
| 167 | bool isDisabled() const { return isDisabled_; } |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 168 | bool isPredefined() const { return isPredefined_; } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 169 | const TokenSequence &replacement() const { return replacement_; } |
| 170 | |
| 171 | bool set_isDisabled(bool disable); |
| 172 | |
| 173 | TokenSequence Apply(const std::vector<TokenSequence> &args); |
| 174 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 175 | private: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 176 | static TokenSequence Tokenize(const std::vector<std::string> &argNames, |
peter klausler | 16c6f53 | 2018-02-05 22:29:26 | [diff] [blame] | 177 | const TokenSequence &token, size_t firstToken, size_t tokens); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 178 | |
| 179 | bool isFunctionLike_{false}; |
| 180 | size_t argumentCount_{0}; |
| 181 | bool isVariadic_{false}; |
| 182 | bool isDisabled_{false}; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 183 | bool isPredefined_{false}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 184 | TokenSequence replacement_; |
| 185 | }; |
| 186 | |
| 187 | // Preprocessing state |
| 188 | class Preprocessor { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 189 | public: |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 190 | explicit Preprocessor(Prescanner &); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 191 | |
| 192 | // When the input contains macros to be replaced, the new token sequence |
| 193 | // is appended to the output and the returned value is true. When |
| 194 | // no macro replacement is necessary, the output is unmodified and the |
| 195 | // return value is false. |
| 196 | bool MacroReplacement(const TokenSequence &, TokenSequence *); |
| 197 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 198 | // Implements a preprocessor directive; returns true when no fatal error. |
| 199 | bool Directive(const TokenSequence &); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 200 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame] | 201 | private: |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 202 | enum class IsElseActive { No, Yes }; |
| 203 | enum class CanDeadElseAppear { No, Yes }; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 204 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 205 | void Complain(const std::string &); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 206 | CharPointerWithLength SaveToken(const CharPointerWithLength &); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 207 | bool IsNameDefined(const CharPointerWithLength &); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 208 | TokenSequence ReplaceMacros(const TokenSequence &); |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 209 | bool SkipDisabledConditionalCode(const std::string &dirName, IsElseActive); |
peter klausler | 16c6f53 | 2018-02-05 22:29:26 | [diff] [blame] | 210 | bool IsIfPredicateTrue( |
| 211 | const TokenSequence &expr, size_t first, size_t exprTokens); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 212 | |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 213 | Prescanner &prescanner_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 214 | std::list<std::string> names_; |
| 215 | std::unordered_map<CharPointerWithLength, Definition> definitions_; |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 216 | std::stack<CanDeadElseAppear> ifStack_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 217 | }; |
peter klausler | 0ba1a14 | 2018-02-07 20:04:42 | [diff] [blame] | 218 | } // namespace parser |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 219 | } // namespace Fortran |
| 220 | #endif // FORTRAN_PREPROCESSOR_H_ |