peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 1 | #ifndef FORTRAN_PREPROCESSOR_H_ |
| 2 | #define FORTRAN_PREPROCESSOR_H_ |
| 3 | |
| 4 | // A Fortran-aware preprocessing module used by the prescanner to implement |
| 5 | // preprocessing directives and macro replacement. Intended to be efficient |
| 6 | // enough to always run on all source files even when no preprocessing is |
| 7 | // needed, so that special compiler command options &/or source file name |
| 8 | // extensions for preprocessing will not be necessary. |
| 9 | |
| 10 | #include "idioms.h" |
| 11 | #include <cctype> |
| 12 | #include <cstring> |
| 13 | #include <functional> |
| 14 | #include <list> |
| 15 | #include <stack> |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 16 | #include <sstream> |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 17 | #include <string> |
| 18 | #include <unordered_map> |
| 19 | #include <vector> |
| 20 | |
| 21 | namespace Fortran { |
| 22 | |
| 23 | class CharBuffer; |
| 24 | class Prescanner; |
| 25 | |
| 26 | // Just a const char pointer with an associated length; does not own the |
| 27 | // referenced data. Used to describe buffered tokens and hash table keys. |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 28 | class CharPointerWithLength { |
| 29 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 30 | CharPointerWithLength() {} |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 31 | CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {} |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 32 | CharPointerWithLength(const std::string &s) |
| 33 | : data_{s.data()}, bytes_{s.size()} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 34 | CharPointerWithLength(const CharPointerWithLength &that) |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 35 | : data_{that.data_}, bytes_{that.bytes_} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 36 | CharPointerWithLength &operator=(const CharPointerWithLength &that) { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 37 | data_ = that.data_; |
| 38 | bytes_ = that.bytes_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 39 | return *this; |
| 40 | } |
| 41 | |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 42 | bool empty() const { return bytes_ == 0; } |
| 43 | size_t size() const { return bytes_; } |
| 44 | const char *data() const { return data_; } |
| 45 | const char &operator[](size_t j) const { return data_[j]; } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 46 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 47 | bool IsBlank() const; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 48 | std::string ToString() const { return std::string{data_, bytes_}; } |
| 49 | |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 50 | private: |
| 51 | const char *data_{nullptr}; |
| 52 | size_t bytes_{0}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 53 | }; |
| 54 | } // namespace Fortran |
| 55 | |
| 56 | // Specializations to enable std::unordered_map<CharPointerWithLength, ...> |
| 57 | template<> struct std::hash<Fortran::CharPointerWithLength> { |
| 58 | size_t operator()(const Fortran::CharPointerWithLength &x) const { |
| 59 | size_t hash{0}; |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 60 | const char *p{x.data()}, *limit{p + x.size()}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 61 | for (; p < limit; ++p) { |
| 62 | hash = (hash * 31) ^ *p; |
| 63 | } |
| 64 | return hash; |
| 65 | } |
| 66 | }; |
| 67 | |
| 68 | template<> struct std::equal_to<Fortran::CharPointerWithLength> { |
| 69 | bool operator()(const Fortran::CharPointerWithLength &x, |
| 70 | const Fortran::CharPointerWithLength &y) const { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 71 | return x.size() == y.size() && |
| 72 | std::memcmp(static_cast<const void *>(x.data()), |
| 73 | static_cast<const void *>(y.data()), |
| 74 | x.size()) == 0; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 75 | } |
| 76 | }; |
| 77 | |
| 78 | namespace Fortran { |
| 79 | |
| 80 | // Buffers a contiguous sequence of characters that has been partitioned into |
| 81 | // a sequence of preprocessing tokens. |
| 82 | class TokenSequence { |
| 83 | public: |
| 84 | TokenSequence() {} |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 85 | TokenSequence(const TokenSequence &that) { Append(that); } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 86 | TokenSequence(TokenSequence &&that) |
| 87 | : start_{std::move(that.start_)}, nextStart_{that.nextStart_}, |
| 88 | char_{std::move(that.char_)} {} |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 89 | TokenSequence(const std::string &s) { push_back(s); } |
| 90 | |
| 91 | TokenSequence &operator=(const TokenSequence &that) { |
| 92 | clear(); |
| 93 | Append(that); |
| 94 | return *this; |
| 95 | } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 96 | TokenSequence &operator=(TokenSequence &&that) { |
| 97 | start_ = std::move(that.start_); |
| 98 | nextStart_ = that.nextStart_; |
| 99 | char_ = std::move(that.char_); |
| 100 | return *this; |
| 101 | } |
| 102 | |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 103 | CharPointerWithLength operator[](size_t token) const { |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 104 | return {&char_[start_[token]], |
| 105 | (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) - |
| 106 | start_[token]}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 107 | } |
| 108 | |
| 109 | void AddChar(char ch) { |
| 110 | char_.emplace_back(ch); |
| 111 | } |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 112 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 113 | void EndToken() { |
| 114 | // CHECK(char_.size() > nextStart_); |
| 115 | start_.emplace_back(nextStart_); |
| 116 | nextStart_ = char_.size(); |
| 117 | } |
| 118 | |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 119 | void ReopenLastToken() { |
| 120 | nextStart_ = start_.back(); |
| 121 | start_.pop_back(); |
| 122 | } |
| 123 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 124 | void Append(const TokenSequence &); |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 125 | void EmitWithCaseConversion(CharBuffer *) const; |
| 126 | std::string ToString() const; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 127 | |
| 128 | bool empty() const { return start_.empty(); } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 129 | size_t size() const { return start_.size(); } |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 130 | const char *data() const { return &char_[0]; } |
| 131 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 132 | void clear() { |
| 133 | start_.clear(); |
| 134 | nextStart_ = 0; |
| 135 | char_.clear(); |
| 136 | } |
| 137 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 138 | void push_back(const char *s, size_t bytes) { |
| 139 | for (size_t j{0}; j < bytes; ++j) { |
| 140 | AddChar(s[j]); |
| 141 | } |
| 142 | EndToken(); |
| 143 | } |
| 144 | |
| 145 | void push_back(const CharPointerWithLength &t) { |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 146 | push_back(t.data(), t.size()); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 147 | } |
| 148 | |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 149 | void push_back(const std::stringstream &ss) { push_back(ss.str()); } |
| 150 | |
| 151 | void pop_back() { |
| 152 | nextStart_ = start_.back(); |
| 153 | start_.pop_back(); |
| 154 | char_.resize(nextStart_); |
| 155 | } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 156 | |
| 157 | void shrink_to_fit() { |
| 158 | start_.shrink_to_fit(); |
| 159 | char_.shrink_to_fit(); |
| 160 | } |
| 161 | |
| 162 | private: |
| 163 | std::vector<int> start_; |
| 164 | size_t nextStart_{0}; |
| 165 | std::vector<char> char_; |
| 166 | }; |
| 167 | |
| 168 | // Defines a macro |
| 169 | class Definition { |
| 170 | public: |
| 171 | Definition(const TokenSequence &, size_t firstToken, size_t tokens); |
| 172 | Definition(const std::vector<std::string> &argNames, const TokenSequence &, |
| 173 | size_t firstToken, size_t tokens); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 174 | explicit Definition(const std::string &predefined); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 175 | |
| 176 | bool isFunctionLike() const { return isFunctionLike_; } |
| 177 | size_t argumentCount() const { return argumentCount_; } |
| 178 | bool isVariadic() const { return isVariadic_; } |
| 179 | bool isDisabled() const { return isDisabled_; } |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 180 | bool isPredefined() const { return isPredefined_; } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 181 | const TokenSequence &replacement() const { return replacement_; } |
| 182 | |
| 183 | bool set_isDisabled(bool disable); |
| 184 | |
| 185 | TokenSequence Apply(const std::vector<TokenSequence> &args); |
| 186 | |
| 187 | private: |
| 188 | static TokenSequence Tokenize(const std::vector<std::string> &argNames, |
| 189 | const TokenSequence &token, size_t firstToken, |
| 190 | size_t tokens); |
| 191 | |
| 192 | bool isFunctionLike_{false}; |
| 193 | size_t argumentCount_{0}; |
| 194 | bool isVariadic_{false}; |
| 195 | bool isDisabled_{false}; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 196 | bool isPredefined_{false}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 197 | TokenSequence replacement_; |
| 198 | }; |
| 199 | |
| 200 | // Preprocessing state |
| 201 | class Preprocessor { |
| 202 | public: |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 203 | explicit Preprocessor(Prescanner &); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 204 | |
| 205 | // When the input contains macros to be replaced, the new token sequence |
| 206 | // is appended to the output and the returned value is true. When |
| 207 | // no macro replacement is necessary, the output is unmodified and the |
| 208 | // return value is false. |
| 209 | bool MacroReplacement(const TokenSequence &, TokenSequence *); |
| 210 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 211 | // Implements a preprocessor directive; returns true when no fatal error. |
| 212 | bool Directive(const TokenSequence &); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 213 | |
| 214 | private: |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 215 | enum class IsElseActive { No, Yes }; |
| 216 | enum class CanDeadElseAppear { No, Yes }; |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 217 | |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 218 | void Complain(const std::string &); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 219 | CharPointerWithLength SaveToken(const CharPointerWithLength &); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 220 | bool IsNameDefined(const CharPointerWithLength &); |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 221 | TokenSequence ReplaceMacros(const TokenSequence &); |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 222 | bool SkipDisabledConditionalCode(const std::string &dirName, IsElseActive); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 223 | bool IsIfPredicateTrue(const TokenSequence &expr, size_t first, |
peter klausler | 55fe4d2 | 2018-02-02 23:52:43 | [diff] [blame^] | 224 | size_t exprTokens); |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 225 | |
peter klausler | ad7125f | 2018-02-01 23:01:23 | [diff] [blame] | 226 | Prescanner &prescanner_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 227 | std::list<std::string> names_; |
| 228 | std::unordered_map<CharPointerWithLength, Definition> definitions_; |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 229 | std::stack<CanDeadElseAppear> ifStack_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 230 | }; |
| 231 | } // namespace Fortran |
| 232 | #endif // FORTRAN_PREPROCESSOR_H_ |