[flang] Preprocessor work. f18 now passes all my tests except for #if (expression)
and file inclusion.
Original-commit: flang-compiler/f18@07ebac21f543a224a8f426faf206f7525899081e
diff --git a/flang/char-buffer.cc b/flang/char-buffer.cc
index 66be6eb..b678500 100644
--- a/flang/char-buffer.cc
+++ b/flang/char-buffer.cc
@@ -40,17 +40,6 @@
Put(str.data(), str.size());
}
-void CharBuffer::Put(const std::vector<char> &data) {
- size_t n{data.size()};
- size_t chunk;
- for (size_t at{0}; at < n; at += chunk) {
- char *to{FreeSpace(&chunk)};
- chunk = std::min(n - at, chunk);
- Claim(chunk);
- std::memcpy(to, &data[at], chunk);
- }
-}
-
void CharBuffer::CopyToContiguous(char *data) {
char *to{data};
for (char ch : *this) {
diff --git a/flang/char-buffer.h b/flang/char-buffer.h
index 743bb24..84ee928 100644
--- a/flang/char-buffer.h
+++ b/flang/char-buffer.h
@@ -41,7 +41,6 @@
void Claim(size_t);
void Put(const char *data, size_t n);
void Put(const std::string &);
- void Put(const std::vector<char> &);
void Put(char x) { Put(&x, 1); }
void CopyToContiguous(char *data);
diff --git a/flang/preprocessor.cc b/flang/preprocessor.cc
index c2dbeaf1..c01a83b 100644
--- a/flang/preprocessor.cc
+++ b/flang/preprocessor.cc
@@ -2,10 +2,12 @@
#include "char-buffer.h"
#include "idioms.h"
#include "prescan.h"
+#include <cctype>
#include <map>
#include <memory>
#include <set>
#include <utility>
+#include <iostream> // TODO pmk rm
namespace Fortran {
@@ -21,8 +23,21 @@
nextStart_ = char_.size();
}
-void TokenSequence::Emit(CharBuffer *out) {
- out->Put(char_);
+void TokenSequence::EmitWithCaseConversion(CharBuffer *out) {
+ size_t tokens{start_.size()};
+ size_t chars{char_.size()};
+ size_t atToken{0};
+ for (size_t j{0}; j < chars; ) {
+ size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
+ if (isalpha(char_[j])) {
+ for (; j < nextStart; ++j) {
+ out->Put(tolower(char_[j]));
+ }
+ } else {
+ out->Put(&char_[j], nextStart - j);
+ j = nextStart;
+ }
+ }
}
Definition::Definition(const TokenSequence &repl, size_t firstToken,
@@ -51,36 +66,18 @@
args[arg] = "~"s + argIndex++;
}
TokenSequence result;
- bool pasting{false};
for (size_t j{0}; j < tokens; ++j) {
size_t bytes{token.GetBytes(firstToken + j)};
if (bytes == 0) {
continue;
}
const char *text{token.GetText(firstToken + j)};
- if (bytes == 2 && text[0] == '#' && text[1] == '#') {
- for (size_t rtc{result.size()};
- rtc > 0 && (result.GetBytes(rtc-1) == 0 ||
- *result.GetText(rtc-1) == ' ');
- --rtc) {
- result.pop_back();
- }
- pasting = true;
- continue;
- }
- if (*text == ' ') {
- if (pasting) {
+ if (bytes > 0 && (*text == '_' || isalpha(*text))) {
+ auto it = args.find(token.GetString(firstToken + j));
+ if (it != args.end()) {
+ result.push_back(it->second);
continue;
}
- } else {
- pasting = false;
- if (bytes > 0 && (*text == '_' || isalpha(*text))) {
- auto it = args.find(token.GetString(firstToken + j));
- if (it != args.end()) {
- result.push_back(it->second);
- continue;
- }
- }
}
result.push_back(text, bytes);
}
@@ -89,7 +86,7 @@
TokenSequence Definition::Apply(const std::vector<TokenSequence> &args) {
TokenSequence result;
- bool stringify{false};
+ bool stringify{false}, pasting{false};
size_t tokens{replacement_.size()};
for (size_t j{0}; j < tokens; ++j) {
size_t bytes{replacement_.GetBytes(j)};
@@ -118,12 +115,34 @@
result.push_back(strung);
} else {
for (size_t k{0}; k < argTokens; ++k) {
- result.push_back(args[index].GetText(k), args[index].GetBytes(k));
+ const char *text{args[index].GetText(k)};
+ size_t bytes{args[index].GetBytes(k)};
+ if (pasting && (bytes == 0 || *text == ' ' || *text == '\t')) {
+ } else {
+ result.push_back(text, bytes);
+ pasting = false;
+ }
}
}
+ } else if (bytes == 2 && text[0] == '#' && text[1] == '#') {
+ // Token pasting operator in body (not expanded argument); discard any
+ // immediately preceding white space, then reopen the last token.
+ while (!result.empty() &&
+ (result.GetBytes(result.size() - 1) == 0 ||
+ *result.GetText(result.size() - 1) == ' ' ||
+ *result.GetText(result.size() - 1) == '\t')) {
+ result.pop_back();
+ }
+ if (!result.empty()) {
+ result.ReopenLastToken();
+ pasting = true;
+ }
+ } else if (pasting && (bytes == 0 || *text == ' ' || *text == '\t')) {
+ // Delete whitespace immediately following ## in the body.
} else {
stringify = bytes == 1 && *text == '#';
result.push_back(text, bytes);
+ pasting = false;
}
}
return result;
@@ -225,7 +244,7 @@
actual.push_back(input.GetText(at), input.GetBytes(at));
}
TokenSequence arg;
- if (!MacroReplacement(actual, &arg)) {
+ if (true /*pmk?*/ || !MacroReplacement(actual, &arg)) {
args.emplace_back(std::move(actual));
} else {
args.emplace_back(std::move(arg));
@@ -279,6 +298,9 @@
return ""s; // TODO: treat as #line
}
std::string dirName{dir.GetString(j)};
+ for (char &ch : dirName) {
+ ch = tolower(ch);
+ }
j = SkipBlanks(dir, j + 1);
std::string nameString;
CharPointerWithLength nameToken;
@@ -331,7 +353,6 @@
definitions_.emplace(
std::make_pair(nameToken, Definition{argName, dir, j, tokens - j}));
} else {
- j = SkipBlanks(dir, j + 1);
definitions_.emplace(
std::make_pair(nameToken, Definition{dir, j, tokens - j}));
}
diff --git a/flang/preprocessor.h b/flang/preprocessor.h
index fd20d85..068d169 100644
--- a/flang/preprocessor.h
+++ b/flang/preprocessor.h
@@ -103,20 +103,28 @@
void AddChar(char ch) {
char_.emplace_back(ch);
}
+
void EndToken() {
// CHECK(char_.size() > nextStart_);
start_.emplace_back(nextStart_);
nextStart_ = char_.size();
}
+ void ReopenLastToken() {
+ nextStart_ = start_.back();
+ start_.pop_back();
+ }
+
void Append(const TokenSequence &);
- void Emit(CharBuffer *);
+ void EmitWithCaseConversion(CharBuffer *);
bool empty() const { return start_.empty(); }
size_t size() const { return start_.size(); }
+ const char *data() const { return &char_[0]; }
+
void clear() {
start_.clear();
nextStart_ = 0;
diff --git a/flang/prescan.cc b/flang/prescan.cc
index 16c6af4..0426ec8 100644
--- a/flang/prescan.cc
+++ b/flang/prescan.cc
@@ -12,32 +12,40 @@
CharBuffer Prescanner::Prescan(const SourceFile &source) {
lineStart_ = source.content();
limit_ = lineStart_ + source.bytes();
- CommentLinesAndPreprocessorDirectives();
CharBuffer out;
TokenSequence tokens, preprocessed;
while (lineStart_ < limit_) {
+ if (CommentLinesAndPreprocessorDirectives() &&
+ lineStart_ >= limit_) {
+ break;
+ }
BeginSourceLineAndAdvance();
if (inFixedForm_) {
- LabelField(&out);
+ LabelField(&tokens);
} else {
SkipSpaces();
}
while (NextToken(&tokens)) {
}
if (preprocessor_.MacroReplacement(tokens, &preprocessed)) {
- // TODO: include label field
- // TODO: recheck for comments, &c.; just retokenize?
- preprocessed.Emit(&out);
+ preprocessed.AddChar('\n');
+ preprocessed.EndToken();
+ if (IsFixedFormCommentLine(preprocessed.data()) ||
+ IsFreeFormComment(preprocessed.data())) {
+ ++newlineDebt_;
+ } else {
+ preprocessed.pop_back(); // clip the newline added above
+ preprocessed.EmitWithCaseConversion(&out);
+ }
preprocessed.clear();
} else {
- tokens.Emit(&out);
+ tokens.EmitWithCaseConversion(&out);
}
tokens.clear();
out.Put('\n');
- for (; newlineDebt_ > 0; --newlineDebt_) {
- out.Put('\n');
- }
+ PayNewlineDebt(&out);
}
+ PayNewlineDebt(&out);
return std::move(out);
}
@@ -66,23 +74,28 @@
}
}
-void Prescanner::LabelField(CharBuffer *out) {
+void Prescanner::LabelField(TokenSequence *token) {
int outCol{1};
- while (*at_ != '\n' && column_ <= 6) {
+ for (; *at_ != '\n' && column_ <= 6; ++at_, ++column_) {
if (*at_ == '\t') {
- NextChar();
+ ++at_;
+ column_ = 7;
break;
}
if (*at_ != ' ' &&
(*at_ != '0' || column_ != 6)) { // '0' in column 6 becomes space
- out->Put(*at_);
+ token->AddChar(*at_);
++outCol;
}
- NextChar();
}
- while (outCol < 7) {
- out->Put(' ');
- ++outCol;
+ if (outCol > 1) {
+ token->EndToken();
+ }
+ if (outCol < 7) {
+ for (; outCol < 7; ++outCol) {
+ token->AddChar(' ');
+ }
+ token->EndToken();
}
}
@@ -94,7 +107,8 @@
while (*at_ == '/' && at_[1] == '*') {
char star{' '}, slash{' '};
for (at_ += 2, column_ += 2;
- *at_ != '\n' && (star != '*' || slash != '/');
+ (*at_ != '\n' || slash == '\\') &&
+ (star != '*' || slash != '/');
++at_, ++column_) {
star = slash;
slash = *at_;
@@ -189,7 +203,7 @@
} else if (isalpha(*at_)) {
// Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
// we don't misrecognize I9HOLLERITH as an identifier in the next case.
- EmitCharAndAdvance(tokens, tolower(*at_));
+ EmitCharAndAdvance(tokens, *at_);
}
preventHollerith_ = false;
} else if (*at_ == '.') {
@@ -197,16 +211,10 @@
while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
}
ExponentAndKind(tokens);
- } else if (isalpha(*at_)) {
- while (IsNameChar(EmitCharAndAdvance(tokens, tolower(*at_)))) {
- }
- if (*at_ == '.') {
- EmitCharAndAdvance(tokens, '.');
- }
}
preventHollerith_ = false;
} else if (IsNameChar(*at_)) {
- while (IsNameChar(EmitCharAndAdvance(tokens, tolower(*at_)))) {
+ while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
}
if (*at_ == '\'' || *at_ == '"') {
QuotedCharacterLiteral(tokens);
@@ -219,7 +227,13 @@
preventHollerith_ = true; // ambiguity: CHARACTER*2H
}
} else {
- char ch{*at_}, nch{EmitCharAndAdvance(tokens, ch)};
+ char ch{*at_};
+ if (ch == '(' || ch == '[') {
+ ++delimiterNesting_;
+ } else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) {
+ --delimiterNesting_;
+ }
+ char nch{EmitCharAndAdvance(tokens, ch)};
preventHollerith_ = false;
if ((nch == '=' && (ch == '<' || ch == '>' || ch == '/' || ch == '=')) ||
(ch == nch && (ch == '/' || ch == ':' || ch == '#')) ||
@@ -244,7 +258,7 @@
EmitCharAndAdvance(tokens, *at_);
}
if (*at_ == '_') {
- while (IsNameChar(EmitCharAndAdvance(tokens, tolower(*at_)))) {
+ while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
}
}
return true;
@@ -348,7 +362,23 @@
return *p == '#';
}
-void Prescanner::CommentLinesAndPreprocessorDirectives() {
+bool Prescanner::CommentLines() {
+ bool any{false};
+ while (lineStart_ < limit_) {
+ if (IsFixedFormCommentLine(lineStart_) ||
+ IsFreeFormComment(lineStart_)) {
+ NextLine();
+ ++newlineDebt_;
+ any = true;
+ } else {
+ break;
+ }
+ }
+ return any;
+}
+
+bool Prescanner::CommentLinesAndPreprocessorDirectives() {
+ bool any{false};
while (lineStart_ < limit_) {
if (IsFixedFormCommentLine(lineStart_) ||
IsFreeFormComment(lineStart_)) {
@@ -366,7 +396,9 @@
break;
}
++newlineDebt_;
+ any = true;
}
+ return any;
}
const char *Prescanner::FixedFormContinuationLine() {
@@ -389,11 +421,14 @@
return p + 6;
}
}
+ if (delimiterNesting_ > 0) {
+ return p;
+ }
return nullptr; // not a continuation line
}
bool Prescanner::FixedFormContinuation() {
- CommentLinesAndPreprocessorDirectives();
+ CommentLines();
const char *cont{FixedFormContinuationLine()};
if (cont == nullptr) {
return false;
@@ -417,7 +452,7 @@
if (*p != '\n' && (inCharLiteral_ || *p != '!')) {
return false;
}
- CommentLinesAndPreprocessorDirectives();
+ CommentLines();
p = lineStart_;
if (p >= limit_) {
return false;
@@ -429,7 +464,7 @@
if (*p == '&') {
++p;
++column;
- } else if (ampersand) {
+ } else if (ampersand || delimiterNesting_ > 0) {
if (p > lineStart_) {
--p;
--column;
@@ -437,9 +472,17 @@
} else {
return false; // not a continuation
}
- BeginSourceLine(p, column);
+ at_ = p;
+ column_ = column;
+ tabInCurrentLine_ = false;
++newlineDebt_;
NextLine();
return true;
}
+
+void Prescanner::PayNewlineDebt(CharBuffer *out) {
+ for (; newlineDebt_ > 0; --newlineDebt_) {
+ out->Put('\n');
+ }
+}
} // namespace Fortran
diff --git a/flang/prescan.h b/flang/prescan.h
index 01fee1f..20dc36e 100644
--- a/flang/prescan.h
+++ b/flang/prescan.h
@@ -51,6 +51,7 @@
column_ = column;
tabInCurrentLine_ = false;
preventHollerith_ = false;
+ delimiterNesting_ = 0;
}
void BeginSourceLineAndAdvance() {
@@ -65,20 +66,22 @@
}
void NextLine();
- void LabelField(CharBuffer *);
+ void LabelField(TokenSequence *);
void NextChar();
void SkipSpaces();
bool NextToken(TokenSequence *);
bool ExponentAndKind(TokenSequence *);
void QuotedCharacterLiteral(TokenSequence *);
bool PadOutCharacterLiteral();
- void CommentLinesAndPreprocessorDirectives();
+ bool CommentLines();
+ bool CommentLinesAndPreprocessorDirectives();
bool IsFixedFormCommentLine(const char *);
bool IsFreeFormComment(const char *);
bool IsPreprocessorDirectiveLine(const char *);
const char *FixedFormContinuationLine();
bool FixedFormContinuation();
bool FreeFormContinuation();
+ void PayNewlineDebt(CharBuffer *);
std::stringstream *error_;
const char *lineStart_{nullptr}; // next line to process; <= limit_
@@ -94,6 +97,7 @@
bool preventHollerith_{false};
bool enableOldDebugLines_{false};
bool enableBackslashEscapesInCharLiterals_{true};
+ int delimiterNesting_{0};
Preprocessor preprocessor_;
};
} // namespace Fortran