peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 1 | #ifndef FORTRAN_PREPROCESSOR_H_ |
| 2 | #define FORTRAN_PREPROCESSOR_H_ |
| 3 | |
| 4 | // A Fortran-aware preprocessing module used by the prescanner to implement |
| 5 | // preprocessing directives and macro replacement. Intended to be efficient |
| 6 | // enough to always run on all source files even when no preprocessing is |
| 7 | // needed, so that special compiler command options &/or source file name |
| 8 | // extensions for preprocessing will not be necessary. |
| 9 | |
| 10 | #include "idioms.h" |
| 11 | #include <cctype> |
| 12 | #include <cstring> |
| 13 | #include <functional> |
| 14 | #include <list> |
| 15 | #include <stack> |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 16 | #include <sstream> |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 17 | #include <string> |
| 18 | #include <unordered_map> |
| 19 | #include <vector> |
| 20 | |
| 21 | namespace Fortran { |
| 22 | |
| 23 | class CharBuffer; |
| 24 | class Prescanner; |
| 25 | |
| 26 | // Just a const char pointer with an associated length; does not own the |
| 27 | // referenced data. Used to describe buffered tokens and hash table keys. |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 28 | class CharPointerWithLength { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 29 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 30 | CharPointerWithLength() {} |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 31 | CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {} |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 32 | CharPointerWithLength(const std::string &s) |
| 33 | : data_{s.data()}, bytes_{s.size()} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 34 | CharPointerWithLength(const CharPointerWithLength &that) |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 35 | : data_{that.data_}, bytes_{that.bytes_} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 36 | CharPointerWithLength &operator=(const CharPointerWithLength &that) { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 37 | data_ = that.data_; |
| 38 | bytes_ = that.bytes_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 39 | return *this; |
| 40 | } |
| 41 | |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 42 | bool empty() const { return bytes_ == 0; } |
| 43 | size_t size() const { return bytes_; } |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 44 | const char &operator[](size_t j) const { return data_[j]; } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 45 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 46 | bool IsBlank() const; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 47 | std::string ToString() const { return std::string{data_, bytes_}; } |
| 48 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 49 | private: |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 50 | const char *data_{nullptr}; |
| 51 | size_t bytes_{0}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 52 | }; |
| 53 | } // namespace Fortran |
| 54 | |
| 55 | // Specializations to enable std::unordered_map<CharPointerWithLength, ...> |
| 56 | template<> struct std::hash<Fortran::CharPointerWithLength> { |
| 57 | size_t operator()(const Fortran::CharPointerWithLength &x) const { |
peter klausler | be830d6 | 2018-02-05 20:48:09 | [diff] [blame] | 58 | size_t hash{0}, bytes{x.size()}; |
| 59 | for (size_t j{0}; j < bytes; ++j) { |
| 60 | hash = (hash * 31) ^ x[j]; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 61 | } |
| 62 | return hash; |
| 63 | } |
| 64 | }; |
| 65 | |
| 66 | template<> struct std::equal_to<Fortran::CharPointerWithLength> { |
| 67 | bool operator()(const Fortran::CharPointerWithLength &x, |
| 68 | const Fortran::CharPointerWithLength &y) const { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 69 | return x.size() == y.size() && |
peter klausler | be830d6 | 2018-02-05 20:48:09 | [diff] [blame] | 70 | std::memcmp(static_cast<const void *>(&x[0]), |
| 71 | static_cast<const void *>(&y[0]), |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 72 | x.size()) == 0; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 73 | } |
| 74 | }; |
| 75 | |
| 76 | namespace Fortran { |
| 77 | |
| 78 | // Buffers a contiguous sequence of characters that has been partitioned into |
| 79 | // a sequence of preprocessing tokens. |
| 80 | class TokenSequence { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 81 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 82 | TokenSequence() {} |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 83 | TokenSequence(const TokenSequence &that) { Append(that); } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 84 | TokenSequence(TokenSequence &&that) |
| 85 | : start_{std::move(that.start_)}, nextStart_{that.nextStart_}, |
| 86 | char_{std::move(that.char_)} {} |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 87 | TokenSequence(const std::string &s) { push_back(s); } |
| 88 | |
| 89 | TokenSequence &operator=(const TokenSequence &that) { |
| 90 | clear(); |
| 91 | Append(that); |
| 92 | return *this; |
| 93 | } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 94 | TokenSequence &operator=(TokenSequence &&that) { |
| 95 | start_ = std::move(that.start_); |
| 96 | nextStart_ = that.nextStart_; |
| 97 | char_ = std::move(that.char_); |
| 98 | return *this; |
| 99 | } |
| 100 | |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 101 | CharPointerWithLength operator[](size_t token) const { |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 102 | return {&char_[start_[token]], |
| 103 | (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) - |
| 104 | start_[token]}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 105 | } |
| 106 | |
| 107 | void AddChar(char ch) { |
| 108 | char_.emplace_back(ch); |
| 109 | } |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 110 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 111 | void EndToken() { |
| 112 | // CHECK(char_.size() > nextStart_); |
| 113 | start_.emplace_back(nextStart_); |
| 114 | nextStart_ = char_.size(); |
| 115 | } |
| 116 | |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 117 | void ReopenLastToken() { |
| 118 | nextStart_ = start_.back(); |
| 119 | start_.pop_back(); |
| 120 | } |
| 121 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 122 | void Append(const TokenSequence &); |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 123 | void EmitWithCaseConversion(CharBuffer *) const; |
| 124 | std::string ToString() const; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 125 | |
| 126 | bool empty() const { return start_.empty(); } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 127 | size_t size() const { return start_.size(); } |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 128 | const char *data() const { return &char_[0]; } |
peter klausler | be830d6 | 2018-02-05 20:48:09 | [diff] [blame] | 129 | void clear(); |
| 130 | void push_back(const char *, size_t); |
| 131 | void push_back(const CharPointerWithLength &); |
| 132 | void push_back(const std::string &); |
| 133 | void push_back(const std::stringstream &); |
| 134 | void pop_back(); |
| 135 | void shrink_to_fit(); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 136 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 137 | private: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 138 | std::vector<int> start_; |
| 139 | size_t nextStart_{0}; |
| 140 | std::vector<char> char_; |
| 141 | }; |
| 142 | |
| 143 | // Defines a macro |
| 144 | class Definition { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 145 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 146 | Definition(const TokenSequence &, size_t firstToken, size_t tokens); |
| 147 | Definition(const std::vector<std::string> &argNames, const TokenSequence &, |
peter klausler | be830d6 | 2018-02-05 20:48:09 | [diff] [blame] | 148 | size_t firstToken, size_t tokens, bool isVariadic = false); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 149 | explicit Definition(const std::string &predefined); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 150 | |
| 151 | bool isFunctionLike() const { return isFunctionLike_; } |
| 152 | size_t argumentCount() const { return argumentCount_; } |
| 153 | bool isVariadic() const { return isVariadic_; } |
| 154 | bool isDisabled() const { return isDisabled_; } |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 155 | bool isPredefined() const { return isPredefined_; } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 156 | const TokenSequence &replacement() const { return replacement_; } |
| 157 | |
| 158 | bool set_isDisabled(bool disable); |
| 159 | |
| 160 | TokenSequence Apply(const std::vector<TokenSequence> &args); |
| 161 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 162 | private: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 163 | static TokenSequence Tokenize(const std::vector<std::string> &argNames, |
| 164 | const TokenSequence &token, size_t firstToken, |
| 165 | size_t tokens); |
| 166 | |
| 167 | bool isFunctionLike_{false}; |
| 168 | size_t argumentCount_{0}; |
| 169 | bool isVariadic_{false}; |
| 170 | bool isDisabled_{false}; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 171 | bool isPredefined_{false}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 172 | TokenSequence replacement_; |
| 173 | }; |
| 174 | |
| 175 | // Preprocessing state |
| 176 | class Preprocessor { |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 177 | public: |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 178 | explicit Preprocessor(Prescanner &); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 179 | |
| 180 | // When the input contains macros to be replaced, the new token sequence |
| 181 | // is appended to the output and the returned value is true. When |
| 182 | // no macro replacement is necessary, the output is unmodified and the |
| 183 | // return value is false. |
| 184 | bool MacroReplacement(const TokenSequence &, TokenSequence *); |
| 185 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 186 | // Implements a preprocessor directive; returns true when no fatal error. |
| 187 | bool Directive(const TokenSequence &); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 188 | |
peter klausler | bdea10d | 2018-02-05 20:54:36 | [diff] [blame^] | 189 | private: |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 190 | enum class IsElseActive { No, Yes }; |
| 191 | enum class CanDeadElseAppear { No, Yes }; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 192 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 193 | void Complain(const std::string &); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 194 | CharPointerWithLength SaveToken(const CharPointerWithLength &); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 195 | bool IsNameDefined(const CharPointerWithLength &); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 196 | TokenSequence ReplaceMacros(const TokenSequence &); |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 197 | bool SkipDisabledConditionalCode(const std::string &dirName, IsElseActive); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 198 | bool IsIfPredicateTrue(const TokenSequence &expr, size_t first, |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame] | 199 | size_t exprTokens); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 200 | |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 201 | Prescanner &prescanner_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 202 | std::list<std::string> names_; |
| 203 | std::unordered_map<CharPointerWithLength, Definition> definitions_; |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 204 | std::stack<CanDeadElseAppear> ifStack_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 205 | }; |
| 206 | } // namespace Fortran |
| 207 | #endif // FORTRAN_PREPROCESSOR_H_ |