blob: eb9de4775c4c3fd54182d3586b3c5da1e506b2d2 [file] [log] [blame]
peter klausler90cd9ba2018-01-30 19:54:311#ifndef FORTRAN_PREPROCESSOR_H_
2#define FORTRAN_PREPROCESSOR_H_
3
4// A Fortran-aware preprocessing module used by the prescanner to implement
5// preprocessing directives and macro replacement. Intended to be efficient
6// enough to always run on all source files even when no preprocessing is
7// needed, so that special compiler command options &/or source file name
8// extensions for preprocessing will not be necessary.
9
10#include "idioms.h"
11#include <cctype>
12#include <cstring>
13#include <functional>
14#include <list>
15#include <stack>
16#include <string>
17#include <unordered_map>
18#include <vector>
19
20namespace Fortran {
21
22class CharBuffer;
23class Prescanner;
24
25// Just a const char pointer with an associated length; does not own the
26// referenced data. Used to describe buffered tokens and hash table keys.
peter klausler33d78542018-01-30 20:21:2527class CharPointerWithLength {
28 public:
peter klausler90cd9ba2018-01-30 19:54:3129 CharPointerWithLength() {}
peter klausler33d78542018-01-30 20:21:2530 CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {}
peter klausler90cd9ba2018-01-30 19:54:3131 CharPointerWithLength(const CharPointerWithLength &that)
peter klausler33d78542018-01-30 20:21:2532 : data_{that.data_}, bytes_{that.bytes_} {}
peter klausler90cd9ba2018-01-30 19:54:3133 CharPointerWithLength &operator=(const CharPointerWithLength &that) {
peter klausler33d78542018-01-30 20:21:2534 data_ = that.data_;
35 bytes_ = that.bytes_;
peter klausler90cd9ba2018-01-30 19:54:3136 return *this;
37 }
38
peter klausler33d78542018-01-30 20:21:2539 bool empty() const { return bytes_ == 0; }
40 size_t size() const { return bytes_; }
41 const char *data() const { return data_; }
42 const char &operator[](size_t j) const { return data_[j]; }
peter klausler90cd9ba2018-01-30 19:54:3143
peter klausler33d78542018-01-30 20:21:2544 private:
45 const char *data_{nullptr};
46 size_t bytes_{0};
peter klausler90cd9ba2018-01-30 19:54:3147};
48} // namespace Fortran
49
50// Specializations to enable std::unordered_map<CharPointerWithLength, ...>
51template<> struct std::hash<Fortran::CharPointerWithLength> {
52 size_t operator()(const Fortran::CharPointerWithLength &x) const {
53 size_t hash{0};
peter klausler33d78542018-01-30 20:21:2554 const char *p{x.data()}, *limit{p + x.size()};
peter klausler90cd9ba2018-01-30 19:54:3155 for (; p < limit; ++p) {
56 hash = (hash * 31) ^ *p;
57 }
58 return hash;
59 }
60};
61
62template<> struct std::equal_to<Fortran::CharPointerWithLength> {
63 bool operator()(const Fortran::CharPointerWithLength &x,
64 const Fortran::CharPointerWithLength &y) const {
peter klausler33d78542018-01-30 20:21:2565 return x.size() == y.size() &&
66 std::memcmp(static_cast<const void *>(x.data()),
67 static_cast<const void *>(y.data()),
68 x.size()) == 0;
peter klausler90cd9ba2018-01-30 19:54:3169 }
70};
71
72namespace Fortran {
73
74// Buffers a contiguous sequence of characters that has been partitioned into
75// a sequence of preprocessing tokens.
76class TokenSequence {
77 public:
78 TokenSequence() {}
79 TokenSequence(TokenSequence &&that)
80 : start_{std::move(that.start_)}, nextStart_{that.nextStart_},
81 char_{std::move(that.char_)} {}
82 TokenSequence &operator=(TokenSequence &&that) {
83 start_ = std::move(that.start_);
84 nextStart_ = that.nextStart_;
85 char_ = std::move(that.char_);
86 return *this;
87 }
88
89 size_t GetBytes(size_t token) const {
90 return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) -
91 start_[token];
92 }
93 const char *GetText(size_t token) const {
94 return &char_[start_[token]];
95 }
96 std::string GetString(size_t token) const {
97 return std::string(GetText(token), GetBytes(token));
98 }
99 CharPointerWithLength GetToken(size_t token) const {
100 return {GetText(token), GetBytes(token)};
101 }
102
103 void AddChar(char ch) {
104 char_.emplace_back(ch);
105 }
peter klausler6f94e842018-01-30 23:22:26106
peter klausler90cd9ba2018-01-30 19:54:31107 void EndToken() {
108 // CHECK(char_.size() > nextStart_);
109 start_.emplace_back(nextStart_);
110 nextStart_ = char_.size();
111 }
112
peter klausler6f94e842018-01-30 23:22:26113 void ReopenLastToken() {
114 nextStart_ = start_.back();
115 start_.pop_back();
116 }
117
peter klausler90cd9ba2018-01-30 19:54:31118 void Append(const TokenSequence &);
119
peter klausler6f94e842018-01-30 23:22:26120 void EmitWithCaseConversion(CharBuffer *);
peter klausler90cd9ba2018-01-30 19:54:31121
122 bool empty() const { return start_.empty(); }
123
124 size_t size() const { return start_.size(); }
125
peter klausler6f94e842018-01-30 23:22:26126 const char *data() const { return &char_[0]; }
127
peter klausler90cd9ba2018-01-30 19:54:31128 void clear() {
129 start_.clear();
130 nextStart_ = 0;
131 char_.clear();
132 }
133
134 void pop_back() {
135 nextStart_ = start_.back();
136 start_.pop_back();
137 char_.resize(nextStart_);
138 }
139
140 void push_back(const char *s, size_t bytes) {
141 for (size_t j{0}; j < bytes; ++j) {
142 AddChar(s[j]);
143 }
144 EndToken();
145 }
146
147 void push_back(const CharPointerWithLength &t) {
peter klausler33d78542018-01-30 20:21:25148 size_t bytes{t.size()};
149 for (size_t j{0}; j < bytes; ++j) {
peter klausler90cd9ba2018-01-30 19:54:31150 AddChar(t[j]);
151 }
152 EndToken();
153 }
154
155 void push_back(const std::string &s) {
156 size_t bytes{s.size()};
157 for (size_t j{0}; j < bytes; ++j) {
158 AddChar(s[j]);
159 }
160 EndToken();
161 }
162
163 void shrink_to_fit() {
164 start_.shrink_to_fit();
165 char_.shrink_to_fit();
166 }
167
168 private:
169 std::vector<int> start_;
170 size_t nextStart_{0};
171 std::vector<char> char_;
172};
173
174// Defines a macro
175class Definition {
176 public:
177 Definition(const TokenSequence &, size_t firstToken, size_t tokens);
178 Definition(const std::vector<std::string> &argNames, const TokenSequence &,
179 size_t firstToken, size_t tokens);
180
181 bool isFunctionLike() const { return isFunctionLike_; }
182 size_t argumentCount() const { return argumentCount_; }
183 bool isVariadic() const { return isVariadic_; }
184 bool isDisabled() const { return isDisabled_; }
185 const TokenSequence &replacement() const { return replacement_; }
186
187 bool set_isDisabled(bool disable);
188
189 TokenSequence Apply(const std::vector<TokenSequence> &args);
190
191 private:
192 static TokenSequence Tokenize(const std::vector<std::string> &argNames,
193 const TokenSequence &token, size_t firstToken,
194 size_t tokens);
195
196 bool isFunctionLike_{false};
197 size_t argumentCount_{0};
198 bool isVariadic_{false};
199 bool isDisabled_{false};
200 TokenSequence replacement_;
201};
202
203// Preprocessing state
204class Preprocessor {
205 public:
206 Preprocessor(Prescanner *ps) : prescanner_{ps} {}
207
208 // When the input contains macros to be replaced, the new token sequence
209 // is appended to the output and the returned value is true. When
210 // no macro replacement is necessary, the output is unmodified and the
211 // return value is false.
212 bool MacroReplacement(const TokenSequence &, TokenSequence *);
213
214 // Implements a preprocessor directive; returns an error message, or an
215 // empty string when successful.
216 std::string Directive(const TokenSequence &);
217
218 private:
peter klausler93cf3ae2018-02-01 20:08:02219 enum class IsElseActive { No, Yes };
220 enum class CanDeadElseAppear { No, Yes };
221 bool IsNameDefined(const CharPointerWithLength &);
222 std::string SkipDisabledConditionalCode(const std::string &dirName,
223 IsElseActive);
224 bool IsIfPredicateTrue(const TokenSequence &expr, size_t first,
225 size_t exprTokens, std::string *errors);
226
peter klausler90cd9ba2018-01-30 19:54:31227 std::list<std::string> names_;
228 std::unordered_map<CharPointerWithLength, Definition> definitions_;
peter klausler93cf3ae2018-02-01 20:08:02229 std::stack<CanDeadElseAppear> ifStack_;
peter klausler90cd9ba2018-01-30 19:54:31230 Prescanner *prescanner_;
231};
232} // namespace Fortran
233#endif // FORTRAN_PREPROCESSOR_H_