[flang] Preprocessor work.  f18 now passes all my tests except for #if (expression)
and file inclusion.

Original-commit: flang-compiler/f18@07ebac21f543a224a8f426faf206f7525899081e
diff --git a/flang/char-buffer.cc b/flang/char-buffer.cc
index 66be6eb..b678500 100644
--- a/flang/char-buffer.cc
+++ b/flang/char-buffer.cc
@@ -40,17 +40,6 @@
   Put(str.data(), str.size());
 }
 
-void CharBuffer::Put(const std::vector<char> &data) {
-  size_t n{data.size()};
-  size_t chunk;
-  for (size_t at{0}; at < n; at += chunk) {
-    char *to{FreeSpace(&chunk)};
-    chunk = std::min(n - at, chunk);
-    Claim(chunk);
-    std::memcpy(to, &data[at], chunk);
-  }
-}
-
 void CharBuffer::CopyToContiguous(char *data) {
   char *to{data};
   for (char ch : *this) {
diff --git a/flang/char-buffer.h b/flang/char-buffer.h
index 743bb24..84ee928 100644
--- a/flang/char-buffer.h
+++ b/flang/char-buffer.h
@@ -41,7 +41,6 @@
   void Claim(size_t);
   void Put(const char *data, size_t n);
   void Put(const std::string &);
-  void Put(const std::vector<char> &);
   void Put(char x) { Put(&x, 1); }
   void CopyToContiguous(char *data);
 
diff --git a/flang/preprocessor.cc b/flang/preprocessor.cc
index c2dbeaf1..c01a83b 100644
--- a/flang/preprocessor.cc
+++ b/flang/preprocessor.cc
@@ -2,10 +2,12 @@
 #include "char-buffer.h"
 #include "idioms.h"
 #include "prescan.h"
+#include <cctype>
 #include <map>
 #include <memory>
 #include <set>
 #include <utility>
+#include <iostream>  // TODO pmk rm
 
 namespace Fortran {
 
@@ -21,8 +23,21 @@
   nextStart_ = char_.size();
 }
 
-void TokenSequence::Emit(CharBuffer *out) {
-  out->Put(char_);
+void TokenSequence::EmitWithCaseConversion(CharBuffer *out) {
+  size_t tokens{start_.size()};
+  size_t chars{char_.size()};
+  size_t atToken{0};
+  for (size_t j{0}; j < chars; ) {
+    size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
+    if (isalpha(char_[j])) {
+      for (; j < nextStart; ++j) {
+        out->Put(tolower(char_[j]));
+      }
+    } else {
+      out->Put(&char_[j], nextStart - j);
+      j = nextStart;
+    }
+  }
 }
 
 Definition::Definition(const TokenSequence &repl, size_t firstToken,
@@ -51,36 +66,18 @@
     args[arg] = "~"s + argIndex++;
   }
   TokenSequence result;
-  bool pasting{false};
   for (size_t j{0}; j < tokens; ++j) {
     size_t bytes{token.GetBytes(firstToken + j)};
     if (bytes == 0) {
       continue;
     }
     const char *text{token.GetText(firstToken + j)};
-    if (bytes == 2 && text[0] == '#' && text[1] == '#') {
-      for (size_t rtc{result.size()};
-           rtc > 0 && (result.GetBytes(rtc-1) == 0 ||
-                       *result.GetText(rtc-1) == ' ');
-           --rtc) {
-        result.pop_back();
-      }
-      pasting = true;
-      continue;
-    }
-    if (*text == ' ') {
-      if (pasting) {
+    if (bytes > 0 && (*text == '_' || isalpha(*text))) {
+      auto it = args.find(token.GetString(firstToken + j));
+      if (it != args.end()) {
+        result.push_back(it->second);
         continue;
       }
-    } else {
-      pasting = false;
-      if (bytes > 0 && (*text == '_' || isalpha(*text))) {
-        auto it = args.find(token.GetString(firstToken + j));
-        if (it != args.end()) {
-          result.push_back(it->second);
-          continue;
-        }
-      }
     }
     result.push_back(text, bytes);
   }
@@ -89,7 +86,7 @@
 
 TokenSequence Definition::Apply(const std::vector<TokenSequence> &args) {
   TokenSequence result;
-  bool stringify{false};
+  bool stringify{false}, pasting{false};
   size_t tokens{replacement_.size()};
   for (size_t j{0}; j < tokens; ++j) {
     size_t bytes{replacement_.GetBytes(j)};
@@ -118,12 +115,34 @@
         result.push_back(strung);
       } else {
         for (size_t k{0}; k < argTokens; ++k) {
-          result.push_back(args[index].GetText(k), args[index].GetBytes(k));
+          const char *text{args[index].GetText(k)};
+          size_t bytes{args[index].GetBytes(k)};
+          if (pasting && (bytes == 0 || *text == ' ' || *text == '\t')) {
+          } else {
+            result.push_back(text, bytes);
+            pasting = false;
+          }
         }
       }
+    } else if (bytes == 2 && text[0] == '#' && text[1] == '#') {
+      // Token pasting operator in body (not expanded argument); discard any
+      // immediately preceding white space, then reopen the last token.
+      while (!result.empty() &&
+             (result.GetBytes(result.size() - 1) == 0 ||
+              *result.GetText(result.size() - 1) == ' ' ||
+              *result.GetText(result.size() - 1) == '\t')) {
+        result.pop_back();
+      }
+      if (!result.empty()) {
+        result.ReopenLastToken();
+        pasting = true;
+      }
+    } else if (pasting && (bytes == 0 || *text == ' ' || *text == '\t')) {
+      // Delete whitespace immediately following ## in the body.
     } else {
       stringify = bytes == 1 && *text == '#';
       result.push_back(text, bytes);
+      pasting = false;
     }
   }
   return result;
@@ -225,7 +244,7 @@
         actual.push_back(input.GetText(at), input.GetBytes(at));
       }
       TokenSequence arg;
-      if (!MacroReplacement(actual, &arg)) {
+      if (true /*pmk?*/ || !MacroReplacement(actual, &arg)) {
         args.emplace_back(std::move(actual));
       } else {
         args.emplace_back(std::move(arg));
@@ -279,6 +298,9 @@
     return ""s;  // TODO: treat as #line
   }
   std::string dirName{dir.GetString(j)};
+  for (char &ch : dirName) {
+    ch = tolower(ch);
+  }
   j = SkipBlanks(dir, j + 1);
   std::string nameString;
   CharPointerWithLength nameToken;
@@ -331,7 +353,6 @@
       definitions_.emplace(
         std::make_pair(nameToken, Definition{argName, dir, j, tokens - j}));
     } else {
-      j = SkipBlanks(dir, j + 1);
       definitions_.emplace(
         std::make_pair(nameToken, Definition{dir, j, tokens - j}));
     }
diff --git a/flang/preprocessor.h b/flang/preprocessor.h
index fd20d85..068d169 100644
--- a/flang/preprocessor.h
+++ b/flang/preprocessor.h
@@ -103,20 +103,28 @@
   void AddChar(char ch) {
     char_.emplace_back(ch);
   }
+
   void EndToken() {
     // CHECK(char_.size() > nextStart_);
     start_.emplace_back(nextStart_);
     nextStart_ = char_.size();
   }
 
+  void ReopenLastToken() {
+    nextStart_ = start_.back();
+    start_.pop_back();
+  }
+
   void Append(const TokenSequence &);
 
-  void Emit(CharBuffer *);
+  void EmitWithCaseConversion(CharBuffer *);
 
   bool empty() const { return start_.empty(); }
 
   size_t size() const { return start_.size(); }
 
+  const char *data() const { return &char_[0]; }
+
   void clear() {
     start_.clear();
     nextStart_ = 0;
diff --git a/flang/prescan.cc b/flang/prescan.cc
index 16c6af4..0426ec8 100644
--- a/flang/prescan.cc
+++ b/flang/prescan.cc
@@ -12,32 +12,40 @@
 CharBuffer Prescanner::Prescan(const SourceFile &source) {
   lineStart_ = source.content();
   limit_ = lineStart_ + source.bytes();
-  CommentLinesAndPreprocessorDirectives();
   CharBuffer out;
   TokenSequence tokens, preprocessed;
   while (lineStart_ < limit_) {
+    if (CommentLinesAndPreprocessorDirectives() &&
+        lineStart_ >= limit_) {
+      break;
+    }
     BeginSourceLineAndAdvance();
     if (inFixedForm_) {
-      LabelField(&out);
+      LabelField(&tokens);
     } else {
       SkipSpaces();
     }
     while (NextToken(&tokens)) {
     }
     if (preprocessor_.MacroReplacement(tokens, &preprocessed)) {
-      // TODO: include label field
-      // TODO: recheck for comments, &c.; just retokenize?
-      preprocessed.Emit(&out);
+      preprocessed.AddChar('\n');
+      preprocessed.EndToken();
+      if (IsFixedFormCommentLine(preprocessed.data()) ||
+          IsFreeFormComment(preprocessed.data())) {
+        ++newlineDebt_;
+      } else {
+        preprocessed.pop_back();  // clip the newline added above
+        preprocessed.EmitWithCaseConversion(&out);
+      }
       preprocessed.clear();
     } else {
-      tokens.Emit(&out);
+      tokens.EmitWithCaseConversion(&out);
     }
     tokens.clear();
     out.Put('\n');
-    for (; newlineDebt_ > 0; --newlineDebt_) {
-      out.Put('\n');
-    }
+    PayNewlineDebt(&out);
   }
+  PayNewlineDebt(&out);
   return std::move(out);
 }
 
@@ -66,23 +74,28 @@
   }
 }
 
-void Prescanner::LabelField(CharBuffer *out) {
+void Prescanner::LabelField(TokenSequence *token) {
   int outCol{1};
-  while (*at_ != '\n' && column_ <= 6) {
+  for (; *at_ != '\n' && column_ <= 6; ++at_, ++column_) {
     if (*at_ == '\t') {
-      NextChar();
+      ++at_;
+      column_ = 7;
       break;
     }
     if (*at_ != ' ' &&
         (*at_ != '0' || column_ != 6)) {  // '0' in column 6 becomes space
-      out->Put(*at_);
+      token->AddChar(*at_);
       ++outCol;
     }
-    NextChar();
   }
-  while (outCol < 7) {
-    out->Put(' ');
-    ++outCol;
+  if (outCol > 1) {
+    token->EndToken();
+  }
+  if (outCol < 7) {
+    for (; outCol < 7; ++outCol) {
+      token->AddChar(' ');
+    }
+    token->EndToken();
   }
 }
 
@@ -94,7 +107,8 @@
     while (*at_ == '/' && at_[1] == '*') {
       char star{' '}, slash{' '};
       for (at_ += 2, column_ += 2;
-           *at_ != '\n' && (star != '*' || slash != '/');
+           (*at_ != '\n' || slash == '\\') &&
+           (star != '*' || slash != '/');
            ++at_, ++column_) {
         star = slash;
         slash = *at_;
@@ -189,7 +203,7 @@
     } else if (isalpha(*at_)) {
       // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
       // we don't misrecognize I9HOLLERITH as an identifier in the next case.
-      EmitCharAndAdvance(tokens, tolower(*at_));
+      EmitCharAndAdvance(tokens, *at_);
     }
     preventHollerith_ = false;
   } else if (*at_ == '.') {
@@ -197,16 +211,10 @@
       while (isdigit(EmitCharAndAdvance(tokens, *at_))) {
       }
       ExponentAndKind(tokens);
-    } else if (isalpha(*at_)) {
-      while (IsNameChar(EmitCharAndAdvance(tokens, tolower(*at_)))) {
-      }
-      if (*at_ == '.') {
-        EmitCharAndAdvance(tokens, '.');
-      }
     }
     preventHollerith_ = false;
   } else if (IsNameChar(*at_)) {
-    while (IsNameChar(EmitCharAndAdvance(tokens, tolower(*at_)))) {
+    while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
     }
     if (*at_ == '\'' || *at_ == '"') {
       QuotedCharacterLiteral(tokens);
@@ -219,7 +227,13 @@
       preventHollerith_ = true;  // ambiguity: CHARACTER*2H
     }
   } else {
-    char ch{*at_}, nch{EmitCharAndAdvance(tokens, ch)};
+    char ch{*at_};
+    if (ch == '(' || ch == '[') {
+      ++delimiterNesting_;
+    } else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) {
+      --delimiterNesting_;
+    }
+    char nch{EmitCharAndAdvance(tokens, ch)};
     preventHollerith_ = false;
     if ((nch == '=' && (ch == '<' || ch == '>' || ch == '/' || ch == '=')) ||
         (ch == nch && (ch == '/' || ch == ':' || ch == '#')) ||
@@ -244,7 +258,7 @@
     EmitCharAndAdvance(tokens, *at_);
   }
   if (*at_ == '_') {
-    while (IsNameChar(EmitCharAndAdvance(tokens, tolower(*at_)))) {
+    while (IsNameChar(EmitCharAndAdvance(tokens, *at_))) {
     }
   }
   return true;
@@ -348,7 +362,23 @@
   return *p == '#';
 }
 
-void Prescanner::CommentLinesAndPreprocessorDirectives() {
+bool Prescanner::CommentLines() {
+  bool any{false};
+  while (lineStart_ < limit_) {
+    if (IsFixedFormCommentLine(lineStart_) ||
+        IsFreeFormComment(lineStart_)) {
+      NextLine();
+      ++newlineDebt_;
+      any = true;
+    } else {
+      break;
+    }
+  }
+  return any;
+}
+
+bool Prescanner::CommentLinesAndPreprocessorDirectives() {
+  bool any{false};
   while (lineStart_ < limit_) {
     if (IsFixedFormCommentLine(lineStart_) ||
         IsFreeFormComment(lineStart_)) {
@@ -366,7 +396,9 @@
       break;
     }
     ++newlineDebt_;
+    any = true;
   }
+  return any;
 }
 
 const char *Prescanner::FixedFormContinuationLine() {
@@ -389,11 +421,14 @@
       return p + 6;
     }
   }
+  if (delimiterNesting_ > 0) {
+    return p;
+  }
   return nullptr;  // not a continuation line
 }
 
 bool Prescanner::FixedFormContinuation() {
-  CommentLinesAndPreprocessorDirectives();
+  CommentLines();
   const char *cont{FixedFormContinuationLine()};
   if (cont == nullptr) {
     return false;
@@ -417,7 +452,7 @@
   if (*p != '\n' && (inCharLiteral_ || *p != '!')) {
     return false;
   }
-  CommentLinesAndPreprocessorDirectives();
+  CommentLines();
   p = lineStart_;
   if (p >= limit_) {
     return false;
@@ -429,7 +464,7 @@
   if (*p == '&') {
     ++p;
     ++column;
-  } else if (ampersand) {
+  } else if (ampersand || delimiterNesting_ > 0) {
     if (p > lineStart_) {
       --p;
       --column;
@@ -437,9 +472,17 @@
   } else {
     return false;  // not a continuation
   }
-  BeginSourceLine(p, column);
+  at_ = p;
+  column_ = column;
+  tabInCurrentLine_ = false;
   ++newlineDebt_;
   NextLine();
   return true;
 }
+
+void Prescanner::PayNewlineDebt(CharBuffer *out) {
+  for (; newlineDebt_ > 0; --newlineDebt_) {
+    out->Put('\n');
+  }
+}
 }  // namespace Fortran
diff --git a/flang/prescan.h b/flang/prescan.h
index 01fee1f..20dc36e 100644
--- a/flang/prescan.h
+++ b/flang/prescan.h
@@ -51,6 +51,7 @@
     column_ = column;
     tabInCurrentLine_ = false;
     preventHollerith_ = false;
+    delimiterNesting_ = 0;
   }
 
   void BeginSourceLineAndAdvance() {
@@ -65,20 +66,22 @@
   }
 
   void NextLine();
-  void LabelField(CharBuffer *);
+  void LabelField(TokenSequence *);
   void NextChar();
   void SkipSpaces();
   bool NextToken(TokenSequence *);
   bool ExponentAndKind(TokenSequence *);
   void QuotedCharacterLiteral(TokenSequence *);
   bool PadOutCharacterLiteral();
-  void CommentLinesAndPreprocessorDirectives();
+  bool CommentLines();
+  bool CommentLinesAndPreprocessorDirectives();
   bool IsFixedFormCommentLine(const char *);
   bool IsFreeFormComment(const char *);
   bool IsPreprocessorDirectiveLine(const char *);
   const char *FixedFormContinuationLine();
   bool FixedFormContinuation();
   bool FreeFormContinuation();
+  void PayNewlineDebt(CharBuffer *);
 
   std::stringstream *error_;
   const char *lineStart_{nullptr};  // next line to process; <= limit_
@@ -94,6 +97,7 @@
   bool preventHollerith_{false};
   bool enableOldDebugLines_{false};
   bool enableBackslashEscapesInCharLiterals_{true};
+  int delimiterNesting_{0};
   Preprocessor preprocessor_;
 };
 }  // namespace Fortran