blob: 29f8d1b3266d4fb55f2d8b4f6824c20e5be968c4 [file] [log] [blame]
peter klausler90cd9ba2018-01-30 19:54:311#ifndef FORTRAN_PREPROCESSOR_H_
2#define FORTRAN_PREPROCESSOR_H_
3
4// A Fortran-aware preprocessing module used by the prescanner to implement
5// preprocessing directives and macro replacement. Intended to be efficient
6// enough to always run on all source files even when no preprocessing is
7// needed, so that special compiler command options &/or source file name
8// extensions for preprocessing will not be necessary.
9
10#include "idioms.h"
11#include <cctype>
12#include <cstring>
13#include <functional>
14#include <list>
15#include <stack>
peter klausler55fe4d22018-02-02 23:52:4316#include <sstream>
peter klausler90cd9ba2018-01-30 19:54:3117#include <string>
18#include <unordered_map>
19#include <vector>
20
21namespace Fortran {
22
23class CharBuffer;
24class Prescanner;
25
26// Just a const char pointer with an associated length; does not own the
27// referenced data. Used to describe buffered tokens and hash table keys.
peter klausler33d78542018-01-30 20:21:2528class CharPointerWithLength {
29 public:
peter klausler90cd9ba2018-01-30 19:54:3130 CharPointerWithLength() {}
peter klausler33d78542018-01-30 20:21:2531 CharPointerWithLength(const char *x, size_t n) : data_{x}, bytes_{n} {}
peter klauslerad7125f2018-02-01 23:01:2332 CharPointerWithLength(const std::string &s)
33 : data_{s.data()}, bytes_{s.size()} {}
peter klausler90cd9ba2018-01-30 19:54:3134 CharPointerWithLength(const CharPointerWithLength &that)
peter klausler33d78542018-01-30 20:21:2535 : data_{that.data_}, bytes_{that.bytes_} {}
peter klausler90cd9ba2018-01-30 19:54:3136 CharPointerWithLength &operator=(const CharPointerWithLength &that) {
peter klausler33d78542018-01-30 20:21:2537 data_ = that.data_;
38 bytes_ = that.bytes_;
peter klausler90cd9ba2018-01-30 19:54:3139 return *this;
40 }
41
peter klausler33d78542018-01-30 20:21:2542 bool empty() const { return bytes_ == 0; }
43 size_t size() const { return bytes_; }
44 const char *data() const { return data_; }
45 const char &operator[](size_t j) const { return data_[j]; }
peter klausler90cd9ba2018-01-30 19:54:3146
peter klausler55fe4d22018-02-02 23:52:4347 bool IsBlank() const;
peter klauslerad7125f2018-02-01 23:01:2348 std::string ToString() const { return std::string{data_, bytes_}; }
49
peter klausler33d78542018-01-30 20:21:2550 private:
51 const char *data_{nullptr};
52 size_t bytes_{0};
peter klausler90cd9ba2018-01-30 19:54:3153};
54} // namespace Fortran
55
56// Specializations to enable std::unordered_map<CharPointerWithLength, ...>
57template<> struct std::hash<Fortran::CharPointerWithLength> {
58 size_t operator()(const Fortran::CharPointerWithLength &x) const {
59 size_t hash{0};
peter klausler33d78542018-01-30 20:21:2560 const char *p{x.data()}, *limit{p + x.size()};
peter klausler90cd9ba2018-01-30 19:54:3161 for (; p < limit; ++p) {
62 hash = (hash * 31) ^ *p;
63 }
64 return hash;
65 }
66};
67
68template<> struct std::equal_to<Fortran::CharPointerWithLength> {
69 bool operator()(const Fortran::CharPointerWithLength &x,
70 const Fortran::CharPointerWithLength &y) const {
peter klausler33d78542018-01-30 20:21:2571 return x.size() == y.size() &&
72 std::memcmp(static_cast<const void *>(x.data()),
73 static_cast<const void *>(y.data()),
74 x.size()) == 0;
peter klausler90cd9ba2018-01-30 19:54:3175 }
76};
77
78namespace Fortran {
79
80// Buffers a contiguous sequence of characters that has been partitioned into
81// a sequence of preprocessing tokens.
82class TokenSequence {
83 public:
84 TokenSequence() {}
peter klauslerad7125f2018-02-01 23:01:2385 TokenSequence(const TokenSequence &that) { Append(that); }
peter klausler90cd9ba2018-01-30 19:54:3186 TokenSequence(TokenSequence &&that)
87 : start_{std::move(that.start_)}, nextStart_{that.nextStart_},
88 char_{std::move(that.char_)} {}
peter klauslerad7125f2018-02-01 23:01:2389 TokenSequence(const std::string &s) { push_back(s); }
90
91 TokenSequence &operator=(const TokenSequence &that) {
92 clear();
93 Append(that);
94 return *this;
95 }
peter klausler90cd9ba2018-01-30 19:54:3196 TokenSequence &operator=(TokenSequence &&that) {
97 start_ = std::move(that.start_);
98 nextStart_ = that.nextStart_;
99 char_ = std::move(that.char_);
100 return *this;
101 }
102
peter klauslerad7125f2018-02-01 23:01:23103 CharPointerWithLength operator[](size_t token) const {
peter klausler55fe4d22018-02-02 23:52:43104 return {&char_[start_[token]],
105 (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) -
106 start_[token]};
peter klausler90cd9ba2018-01-30 19:54:31107 }
108
109 void AddChar(char ch) {
110 char_.emplace_back(ch);
111 }
peter klausler6f94e842018-01-30 23:22:26112
peter klausler90cd9ba2018-01-30 19:54:31113 void EndToken() {
114 // CHECK(char_.size() > nextStart_);
115 start_.emplace_back(nextStart_);
116 nextStart_ = char_.size();
117 }
118
peter klausler6f94e842018-01-30 23:22:26119 void ReopenLastToken() {
120 nextStart_ = start_.back();
121 start_.pop_back();
122 }
123
peter klausler90cd9ba2018-01-30 19:54:31124 void Append(const TokenSequence &);
peter klausler55fe4d22018-02-02 23:52:43125 void EmitWithCaseConversion(CharBuffer *) const;
126 std::string ToString() const;
peter klausler90cd9ba2018-01-30 19:54:31127
128 bool empty() const { return start_.empty(); }
peter klausler90cd9ba2018-01-30 19:54:31129 size_t size() const { return start_.size(); }
peter klausler6f94e842018-01-30 23:22:26130 const char *data() const { return &char_[0]; }
131
peter klausler90cd9ba2018-01-30 19:54:31132 void clear() {
133 start_.clear();
134 nextStart_ = 0;
135 char_.clear();
136 }
137
peter klausler90cd9ba2018-01-30 19:54:31138 void push_back(const char *s, size_t bytes) {
139 for (size_t j{0}; j < bytes; ++j) {
140 AddChar(s[j]);
141 }
142 EndToken();
143 }
144
145 void push_back(const CharPointerWithLength &t) {
peter klauslerad7125f2018-02-01 23:01:23146 push_back(t.data(), t.size());
peter klausler90cd9ba2018-01-30 19:54:31147 }
148
peter klauslerad7125f2018-02-01 23:01:23149 void push_back(const std::stringstream &ss) { push_back(ss.str()); }
150
151 void pop_back() {
152 nextStart_ = start_.back();
153 start_.pop_back();
154 char_.resize(nextStart_);
155 }
peter klausler90cd9ba2018-01-30 19:54:31156
157 void shrink_to_fit() {
158 start_.shrink_to_fit();
159 char_.shrink_to_fit();
160 }
161
162 private:
163 std::vector<int> start_;
164 size_t nextStart_{0};
165 std::vector<char> char_;
166};
167
168// Defines a macro
169class Definition {
170 public:
171 Definition(const TokenSequence &, size_t firstToken, size_t tokens);
172 Definition(const std::vector<std::string> &argNames, const TokenSequence &,
173 size_t firstToken, size_t tokens);
peter klauslerad7125f2018-02-01 23:01:23174 explicit Definition(const std::string &predefined);
peter klausler90cd9ba2018-01-30 19:54:31175
176 bool isFunctionLike() const { return isFunctionLike_; }
177 size_t argumentCount() const { return argumentCount_; }
178 bool isVariadic() const { return isVariadic_; }
179 bool isDisabled() const { return isDisabled_; }
peter klauslerad7125f2018-02-01 23:01:23180 bool isPredefined() const { return isPredefined_; }
peter klausler90cd9ba2018-01-30 19:54:31181 const TokenSequence &replacement() const { return replacement_; }
182
183 bool set_isDisabled(bool disable);
184
185 TokenSequence Apply(const std::vector<TokenSequence> &args);
186
187 private:
188 static TokenSequence Tokenize(const std::vector<std::string> &argNames,
189 const TokenSequence &token, size_t firstToken,
190 size_t tokens);
191
192 bool isFunctionLike_{false};
193 size_t argumentCount_{0};
194 bool isVariadic_{false};
195 bool isDisabled_{false};
peter klauslerad7125f2018-02-01 23:01:23196 bool isPredefined_{false};
peter klausler90cd9ba2018-01-30 19:54:31197 TokenSequence replacement_;
198};
199
200// Preprocessing state
201class Preprocessor {
202 public:
peter klauslerad7125f2018-02-01 23:01:23203 explicit Preprocessor(Prescanner &);
peter klausler90cd9ba2018-01-30 19:54:31204
205 // When the input contains macros to be replaced, the new token sequence
206 // is appended to the output and the returned value is true. When
207 // no macro replacement is necessary, the output is unmodified and the
208 // return value is false.
209 bool MacroReplacement(const TokenSequence &, TokenSequence *);
210
peter klausler55fe4d22018-02-02 23:52:43211 // Implements a preprocessor directive; returns true when no fatal error.
212 bool Directive(const TokenSequence &);
peter klausler90cd9ba2018-01-30 19:54:31213
214 private:
peter klausler93cf3ae2018-02-01 20:08:02215 enum class IsElseActive { No, Yes };
216 enum class CanDeadElseAppear { No, Yes };
peter klauslerad7125f2018-02-01 23:01:23217
peter klausler55fe4d22018-02-02 23:52:43218 void Complain(const std::string &);
peter klauslerad7125f2018-02-01 23:01:23219 CharPointerWithLength SaveToken(const CharPointerWithLength &);
peter klausler93cf3ae2018-02-01 20:08:02220 bool IsNameDefined(const CharPointerWithLength &);
peter klauslerad7125f2018-02-01 23:01:23221 TokenSequence ReplaceMacros(const TokenSequence &);
peter klausler55fe4d22018-02-02 23:52:43222 bool SkipDisabledConditionalCode(const std::string &dirName, IsElseActive);
peter klausler93cf3ae2018-02-01 20:08:02223 bool IsIfPredicateTrue(const TokenSequence &expr, size_t first,
peter klausler55fe4d22018-02-02 23:52:43224 size_t exprTokens);
peter klausler93cf3ae2018-02-01 20:08:02225
peter klauslerad7125f2018-02-01 23:01:23226 Prescanner &prescanner_;
peter klausler90cd9ba2018-01-30 19:54:31227 std::list<std::string> names_;
228 std::unordered_map<CharPointerWithLength, Definition> definitions_;
peter klausler93cf3ae2018-02-01 20:08:02229 std::stack<CanDeadElseAppear> ifStack_;
peter klausler90cd9ba2018-01-30 19:54:31230};
231} // namespace Fortran
232#endif // FORTRAN_PREPROCESSOR_H_