peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 1 | #ifndef FORTRAN_PREPROCESSOR_H_ |
| 2 | #define FORTRAN_PREPROCESSOR_H_ |
| 3 | |
| 4 | // A Fortran-aware preprocessing module used by the prescanner to implement |
| 5 | // preprocessing directives and macro replacement. Intended to be efficient |
| 6 | // enough to always run on all source files even when no preprocessing is |
| 7 | // needed, so that special compiler command options &/or source file name |
| 8 | // extensions for preprocessing will not be necessary. |
| 9 | |
| 10 | #include "idioms.h" |
| 11 | #include <cctype> |
| 12 | #include <cstring> |
| 13 | #include <functional> |
| 14 | #include <list> |
| 15 | #include <stack> |
| 16 | #include <string> |
| 17 | #include <unordered_map> |
| 18 | #include <vector> |
| 19 | |
| 20 | namespace Fortran { |
| 21 | |
| 22 | class CharBuffer; |
| 23 | class Prescanner; |
| 24 | |
| 25 | // Just a const char pointer with an associated length; does not own the |
| 26 | // referenced data. Used to describe buffered tokens and hash table keys. |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 27 | class CharPointerWithLength { |
| 28 | public: |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 29 | CharPointerWithLength() {} |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 30 | CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 31 | CharPointerWithLength(const CharPointerWithLength &that) |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 32 | : data_{that.data_}, bytes_{that.bytes_} {} |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 33 | CharPointerWithLength &operator=(const CharPointerWithLength &that) { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 34 | data_ = that.data_; |
| 35 | bytes_ = that.bytes_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 36 | return *this; |
| 37 | } |
| 38 | |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 39 | bool empty() const { return bytes_ == 0; } |
| 40 | size_t size() const { return bytes_; } |
| 41 | const char *data() const { return data_; } |
| 42 | const char &operator[](size_t j) const { return data_[j]; } |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 43 | |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 44 | private: |
| 45 | const char *data_{nullptr}; |
| 46 | size_t bytes_{0}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 47 | }; |
| 48 | } // namespace Fortran |
| 49 | |
| 50 | // Specializations to enable std::unordered_map<CharPointerWithLength, ...> |
| 51 | template<> struct std::hash<Fortran::CharPointerWithLength> { |
| 52 | size_t operator()(const Fortran::CharPointerWithLength &x) const { |
| 53 | size_t hash{0}; |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 54 | const char *p{x.data()}, *limit{p + x.size()}; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 55 | for (; p < limit; ++p) { |
| 56 | hash = (hash * 31) ^ *p; |
| 57 | } |
| 58 | return hash; |
| 59 | } |
| 60 | }; |
| 61 | |
| 62 | template<> struct std::equal_to<Fortran::CharPointerWithLength> { |
| 63 | bool operator()(const Fortran::CharPointerWithLength &x, |
| 64 | const Fortran::CharPointerWithLength &y) const { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 65 | return x.size() == y.size() && |
| 66 | std::memcmp(static_cast<const void *>(x.data()), |
| 67 | static_cast<const void *>(y.data()), |
| 68 | x.size()) == 0; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 69 | } |
| 70 | }; |
| 71 | |
| 72 | namespace Fortran { |
| 73 | |
| 74 | // Buffers a contiguous sequence of characters that has been partitioned into |
| 75 | // a sequence of preprocessing tokens. |
| 76 | class TokenSequence { |
| 77 | public: |
| 78 | TokenSequence() {} |
| 79 | TokenSequence(TokenSequence &&that) |
| 80 | : start_{std::move(that.start_)}, nextStart_{that.nextStart_}, |
| 81 | char_{std::move(that.char_)} {} |
| 82 | TokenSequence &operator=(TokenSequence &&that) { |
| 83 | start_ = std::move(that.start_); |
| 84 | nextStart_ = that.nextStart_; |
| 85 | char_ = std::move(that.char_); |
| 86 | return *this; |
| 87 | } |
| 88 | |
| 89 | size_t GetBytes(size_t token) const { |
| 90 | return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) - |
| 91 | start_[token]; |
| 92 | } |
| 93 | const char *GetText(size_t token) const { |
| 94 | return &char_[start_[token]]; |
| 95 | } |
| 96 | std::string GetString(size_t token) const { |
| 97 | return std::string(GetText(token), GetBytes(token)); |
| 98 | } |
| 99 | CharPointerWithLength GetToken(size_t token) const { |
| 100 | return {GetText(token), GetBytes(token)}; |
| 101 | } |
| 102 | |
| 103 | void AddChar(char ch) { |
| 104 | char_.emplace_back(ch); |
| 105 | } |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 106 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 107 | void EndToken() { |
| 108 | // CHECK(char_.size() > nextStart_); |
| 109 | start_.emplace_back(nextStart_); |
| 110 | nextStart_ = char_.size(); |
| 111 | } |
| 112 | |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 113 | void ReopenLastToken() { |
| 114 | nextStart_ = start_.back(); |
| 115 | start_.pop_back(); |
| 116 | } |
| 117 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 118 | void Append(const TokenSequence &); |
| 119 | |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 120 | void EmitWithCaseConversion(CharBuffer *); |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 121 | |
| 122 | bool empty() const { return start_.empty(); } |
| 123 | |
| 124 | size_t size() const { return start_.size(); } |
| 125 | |
peter klausler | 6f94e84 | 2018-01-30 23:22:26 | [diff] [blame] | 126 | const char *data() const { return &char_[0]; } |
| 127 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 128 | void clear() { |
| 129 | start_.clear(); |
| 130 | nextStart_ = 0; |
| 131 | char_.clear(); |
| 132 | } |
| 133 | |
| 134 | void pop_back() { |
| 135 | nextStart_ = start_.back(); |
| 136 | start_.pop_back(); |
| 137 | char_.resize(nextStart_); |
| 138 | } |
| 139 | |
| 140 | void push_back(const char *s, size_t bytes) { |
| 141 | for (size_t j{0}; j < bytes; ++j) { |
| 142 | AddChar(s[j]); |
| 143 | } |
| 144 | EndToken(); |
| 145 | } |
| 146 | |
| 147 | void push_back(const CharPointerWithLength &t) { |
peter klausler | 33d7854 | 2018-01-30 20:21:25 | [diff] [blame] | 148 | size_t bytes{t.size()}; |
| 149 | for (size_t j{0}; j < bytes; ++j) { |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 150 | AddChar(t[j]); |
| 151 | } |
| 152 | EndToken(); |
| 153 | } |
| 154 | |
| 155 | void push_back(const std::string &s) { |
| 156 | size_t bytes{s.size()}; |
| 157 | for (size_t j{0}; j < bytes; ++j) { |
| 158 | AddChar(s[j]); |
| 159 | } |
| 160 | EndToken(); |
| 161 | } |
| 162 | |
| 163 | void shrink_to_fit() { |
| 164 | start_.shrink_to_fit(); |
| 165 | char_.shrink_to_fit(); |
| 166 | } |
| 167 | |
| 168 | private: |
| 169 | std::vector<int> start_; |
| 170 | size_t nextStart_{0}; |
| 171 | std::vector<char> char_; |
| 172 | }; |
| 173 | |
| 174 | // Defines a macro |
| 175 | class Definition { |
| 176 | public: |
| 177 | Definition(const TokenSequence &, size_t firstToken, size_t tokens); |
| 178 | Definition(const std::vector<std::string> &argNames, const TokenSequence &, |
| 179 | size_t firstToken, size_t tokens); |
| 180 | |
| 181 | bool isFunctionLike() const { return isFunctionLike_; } |
| 182 | size_t argumentCount() const { return argumentCount_; } |
| 183 | bool isVariadic() const { return isVariadic_; } |
| 184 | bool isDisabled() const { return isDisabled_; } |
| 185 | const TokenSequence &replacement() const { return replacement_; } |
| 186 | |
| 187 | bool set_isDisabled(bool disable); |
| 188 | |
| 189 | TokenSequence Apply(const std::vector<TokenSequence> &args); |
| 190 | |
| 191 | private: |
| 192 | static TokenSequence Tokenize(const std::vector<std::string> &argNames, |
| 193 | const TokenSequence &token, size_t firstToken, |
| 194 | size_t tokens); |
| 195 | |
| 196 | bool isFunctionLike_{false}; |
| 197 | size_t argumentCount_{0}; |
| 198 | bool isVariadic_{false}; |
| 199 | bool isDisabled_{false}; |
| 200 | TokenSequence replacement_; |
| 201 | }; |
| 202 | |
| 203 | // Preprocessing state |
| 204 | class Preprocessor { |
| 205 | public: |
| 206 | Preprocessor(Prescanner *ps) : prescanner_{ps} {} |
| 207 | |
| 208 | // When the input contains macros to be replaced, the new token sequence |
| 209 | // is appended to the output and the returned value is true. When |
| 210 | // no macro replacement is necessary, the output is unmodified and the |
| 211 | // return value is false. |
| 212 | bool MacroReplacement(const TokenSequence &, TokenSequence *); |
| 213 | |
| 214 | // Implements a preprocessor directive; returns an error message, or an |
| 215 | // empty string when successful. |
| 216 | std::string Directive(const TokenSequence &); |
| 217 | |
| 218 | private: |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 219 | enum class IsElseActive { No, Yes }; |
| 220 | enum class CanDeadElseAppear { No, Yes }; |
| 221 | bool IsNameDefined(const CharPointerWithLength &); |
| 222 | std::string SkipDisabledConditionalCode(const std::string &dirName, |
| 223 | IsElseActive); |
| 224 | bool IsIfPredicateTrue(const TokenSequence &expr, size_t first, |
| 225 | size_t exprTokens, std::string *errors); |
| 226 | |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 227 | std::list<std::string> names_; |
| 228 | std::unordered_map<CharPointerWithLength, Definition> definitions_; |
peter klausler | 93cf3ae | 2018-02-01 20:08:02 | [diff] [blame] | 229 | std::stack<CanDeadElseAppear> ifStack_; |
peter klausler | 90cd9ba | 2018-01-30 19:54:31 | [diff] [blame] | 230 | Prescanner *prescanner_; |
| 231 | }; |
| 232 | } // namespace Fortran |
| 233 | #endif // FORTRAN_PREPROCESSOR_H_ |