blob: ad9fee11cfec65cee437c37a519ea9a56dc901da [file] [log] [blame]
peter klausler90cd9ba2018-01-30 19:54:311#ifndef FORTRAN_PREPROCESSOR_H_
2#define FORTRAN_PREPROCESSOR_H_
3
4// A Fortran-aware preprocessing module used by the prescanner to implement
5// preprocessing directives and macro replacement. Intended to be efficient
6// enough to always run on all source files even when no preprocessing is
7// needed, so that special compiler command options &/or source file name
8// extensions for preprocessing will not be necessary.
9
10#include "idioms.h"
11#include <cctype>
12#include <cstring>
13#include <functional>
14#include <list>
peter klauslerad7125f2018-02-01 23:01:2315#include <sstream>
peter klausler90cd9ba2018-01-30 19:54:3116#include <stack>
17#include <string>
18#include <unordered_map>
19#include <vector>
20
21namespace Fortran {
22
23class CharBuffer;
24class Prescanner;
25
26// Just a const char pointer with an associated length; does not own the
27// referenced data. Used to describe buffered tokens and hash table keys.
peter klausler33d78542018-01-30 20:21:2528class CharPointerWithLength {
29 public:
peter klausler90cd9ba2018-01-30 19:54:3130 CharPointerWithLength() {}
peter klausler33d78542018-01-30 20:21:2531 CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {}
peter klauslerad7125f2018-02-01 23:01:2332 CharPointerWithLength(const std::string &s)
33 : data_{s.data()}, bytes_{s.size()} {}
peter klausler90cd9ba2018-01-30 19:54:3134 CharPointerWithLength(const CharPointerWithLength &that)
peter klausler33d78542018-01-30 20:21:2535 : data_{that.data_}, bytes_{that.bytes_} {}
peter klausler90cd9ba2018-01-30 19:54:3136 CharPointerWithLength &operator=(const CharPointerWithLength &that) {
peter klausler33d78542018-01-30 20:21:2537 data_ = that.data_;
38 bytes_ = that.bytes_;
peter klausler90cd9ba2018-01-30 19:54:3139 return *this;
40 }
41
peter klausler33d78542018-01-30 20:21:2542 bool empty() const { return bytes_ == 0; }
43 size_t size() const { return bytes_; }
44 const char *data() const { return data_; }
45 const char &operator[](size_t j) const { return data_[j]; }
peter klausler90cd9ba2018-01-30 19:54:3146
peter klauslerad7125f2018-02-01 23:01:2347 std::string ToString() const { return std::string{data_, bytes_}; }
48
peter klausler33d78542018-01-30 20:21:2549 private:
50 const char *data_{nullptr};
51 size_t bytes_{0};
peter klausler90cd9ba2018-01-30 19:54:3152};
53} // namespace Fortran
54
55// Specializations to enable std::unordered_map<CharPointerWithLength, ...>
56template<> struct std::hash<Fortran::CharPointerWithLength> {
57 size_t operator()(const Fortran::CharPointerWithLength &x) const {
58 size_t hash{0};
peter klausler33d78542018-01-30 20:21:2559 const char *p{x.data()}, *limit{p + x.size()};
peter klausler90cd9ba2018-01-30 19:54:3160 for (; p < limit; ++p) {
61 hash = (hash * 31) ^ *p;
62 }
63 return hash;
64 }
65};
66
67template<> struct std::equal_to<Fortran::CharPointerWithLength> {
68 bool operator()(const Fortran::CharPointerWithLength &x,
69 const Fortran::CharPointerWithLength &y) const {
peter klausler33d78542018-01-30 20:21:2570 return x.size() == y.size() &&
71 std::memcmp(static_cast<const void *>(x.data()),
72 static_cast<const void *>(y.data()),
73 x.size()) == 0;
peter klausler90cd9ba2018-01-30 19:54:3174 }
75};
76
77namespace Fortran {
78
79// Buffers a contiguous sequence of characters that has been partitioned into
80// a sequence of preprocessing tokens.
81class TokenSequence {
82 public:
83 TokenSequence() {}
peter klauslerad7125f2018-02-01 23:01:2384 TokenSequence(const TokenSequence &that) { Append(that); }
peter klausler90cd9ba2018-01-30 19:54:3185 TokenSequence(TokenSequence &&that)
86 : start_{std::move(that.start_)}, nextStart_{that.nextStart_},
87 char_{std::move(that.char_)} {}
peter klauslerad7125f2018-02-01 23:01:2388 TokenSequence(const std::string &s) { push_back(s); }
89
90 TokenSequence &operator=(const TokenSequence &that) {
91 clear();
92 Append(that);
93 return *this;
94 }
peter klausler90cd9ba2018-01-30 19:54:3195 TokenSequence &operator=(TokenSequence &&that) {
96 start_ = std::move(that.start_);
97 nextStart_ = that.nextStart_;
98 char_ = std::move(that.char_);
99 return *this;
100 }
101
102 size_t GetBytes(size_t token) const {
103 return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) -
104 start_[token];
105 }
106 const char *GetText(size_t token) const {
107 return &char_[start_[token]];
108 }
109 std::string GetString(size_t token) const {
110 return std::string(GetText(token), GetBytes(token));
111 }
peter klauslerad7125f2018-02-01 23:01:23112 CharPointerWithLength operator[](size_t token) const {
peter klausler90cd9ba2018-01-30 19:54:31113 return {GetText(token), GetBytes(token)};
114 }
115
116 void AddChar(char ch) {
117 char_.emplace_back(ch);
118 }
peter klausler6f94e842018-01-30 23:22:26119
peter klausler90cd9ba2018-01-30 19:54:31120 void EndToken() {
121 // CHECK(char_.size() > nextStart_);
122 start_.emplace_back(nextStart_);
123 nextStart_ = char_.size();
124 }
125
peter klausler6f94e842018-01-30 23:22:26126 void ReopenLastToken() {
127 nextStart_ = start_.back();
128 start_.pop_back();
129 }
130
peter klausler90cd9ba2018-01-30 19:54:31131 void Append(const TokenSequence &);
peter klausler6f94e842018-01-30 23:22:26132 void EmitWithCaseConversion(CharBuffer *);
peter klausler90cd9ba2018-01-30 19:54:31133
134 bool empty() const { return start_.empty(); }
peter klausler90cd9ba2018-01-30 19:54:31135 size_t size() const { return start_.size(); }
peter klausler6f94e842018-01-30 23:22:26136 const char *data() const { return &char_[0]; }
137
peter klausler90cd9ba2018-01-30 19:54:31138 void clear() {
139 start_.clear();
140 nextStart_ = 0;
141 char_.clear();
142 }
143
peter klausler90cd9ba2018-01-30 19:54:31144 void push_back(const char *s, size_t bytes) {
145 for (size_t j{0}; j < bytes; ++j) {
146 AddChar(s[j]);
147 }
148 EndToken();
149 }
150
151 void push_back(const CharPointerWithLength &t) {
peter klauslerad7125f2018-02-01 23:01:23152 push_back(t.data(), t.size());
peter klausler90cd9ba2018-01-30 19:54:31153 }
154
peter klauslerad7125f2018-02-01 23:01:23155#if 0
peter klausler90cd9ba2018-01-30 19:54:31156 void push_back(const std::string &s) {
157 size_t bytes{s.size()};
158 for (size_t j{0}; j < bytes; ++j) {
159 AddChar(s[j]);
160 }
161 EndToken();
162 }
peter klauslerad7125f2018-02-01 23:01:23163#endif
164
165 void push_back(const std::stringstream &ss) { push_back(ss.str()); }
166
167 void pop_back() {
168 nextStart_ = start_.back();
169 start_.pop_back();
170 char_.resize(nextStart_);
171 }
peter klausler90cd9ba2018-01-30 19:54:31172
173 void shrink_to_fit() {
174 start_.shrink_to_fit();
175 char_.shrink_to_fit();
176 }
177
178 private:
179 std::vector<int> start_;
180 size_t nextStart_{0};
181 std::vector<char> char_;
182};
183
184// Defines a macro
185class Definition {
186 public:
187 Definition(const TokenSequence &, size_t firstToken, size_t tokens);
188 Definition(const std::vector<std::string> &argNames, const TokenSequence &,
189 size_t firstToken, size_t tokens);
peter klauslerad7125f2018-02-01 23:01:23190 explicit Definition(const std::string &predefined);
peter klausler90cd9ba2018-01-30 19:54:31191
192 bool isFunctionLike() const { return isFunctionLike_; }
193 size_t argumentCount() const { return argumentCount_; }
194 bool isVariadic() const { return isVariadic_; }
195 bool isDisabled() const { return isDisabled_; }
peter klauslerad7125f2018-02-01 23:01:23196 bool isPredefined() const { return isPredefined_; }
peter klausler90cd9ba2018-01-30 19:54:31197 const TokenSequence &replacement() const { return replacement_; }
198
199 bool set_isDisabled(bool disable);
200
201 TokenSequence Apply(const std::vector<TokenSequence> &args);
202
203 private:
204 static TokenSequence Tokenize(const std::vector<std::string> &argNames,
205 const TokenSequence &token, size_t firstToken,
206 size_t tokens);
207
208 bool isFunctionLike_{false};
209 size_t argumentCount_{0};
210 bool isVariadic_{false};
211 bool isDisabled_{false};
peter klauslerad7125f2018-02-01 23:01:23212 bool isPredefined_{false};
peter klausler90cd9ba2018-01-30 19:54:31213 TokenSequence replacement_;
214};
215
216// Preprocessing state
217class Preprocessor {
218 public:
peter klauslerad7125f2018-02-01 23:01:23219 explicit Preprocessor(Prescanner &);
peter klausler90cd9ba2018-01-30 19:54:31220
221 // When the input contains macros to be replaced, the new token sequence
222 // is appended to the output and the returned value is true. When
223 // no macro replacement is necessary, the output is unmodified and the
224 // return value is false.
225 bool MacroReplacement(const TokenSequence &, TokenSequence *);
226
227 // Implements a preprocessor directive; returns an error message, or an
228 // empty string when successful.
229 std::string Directive(const TokenSequence &);
230
231 private:
peter klausler93cf3ae2018-02-01 20:08:02232 enum class IsElseActive { No, Yes };
233 enum class CanDeadElseAppear { No, Yes };
peter klauslerad7125f2018-02-01 23:01:23234
235 CharPointerWithLength SaveToken(const CharPointerWithLength &);
peter klausler93cf3ae2018-02-01 20:08:02236 bool IsNameDefined(const CharPointerWithLength &);
peter klauslerad7125f2018-02-01 23:01:23237 TokenSequence ReplaceMacros(const TokenSequence &);
peter klausler93cf3ae2018-02-01 20:08:02238 std::string SkipDisabledConditionalCode(const std::string &dirName,
239 IsElseActive);
240 bool IsIfPredicateTrue(const TokenSequence &expr, size_t first,
241 size_t exprTokens, std::string *errors);
242
peter klauslerad7125f2018-02-01 23:01:23243 Prescanner &prescanner_;
peter klausler90cd9ba2018-01-30 19:54:31244 std::list<std::string> names_;
245 std::unordered_map<CharPointerWithLength, Definition> definitions_;
peter klausler93cf3ae2018-02-01 20:08:02246 std::stack<CanDeadElseAppear> ifStack_;
peter klausler90cd9ba2018-01-30 19:54:31247};
248} // namespace Fortran
249#endif // FORTRAN_PREPROCESSOR_H_