[flang] Handle empty files gracefully.
Create interval.h. Use std::size_t instead of bare size_t. Redefine parser::Name to not be just a bare string.
Break out and rename CharBlock from token-sequence.h for use in the parse tree.
Incremental replacement of name strings with pointers to cooked characters.
Fix case sensitivity problem.
Use new CharBlock encoding to replace strings for real literal constants.
Normalized cooked character stream to lower case.
Simplify parsing now that cooked stream is lower case. Replace Keyword in parse tree.
Add static_asserts to || and recovery parsers to enforce same result types.
Remove needless TODO comment inserted earlier.
Fix case conversion on prefixed character literals (f90_correct/dc04.f90).
Use CharBlock in user-state.h.
Complete transition from nextChar to nextCh (i.e., always use pointers).
Document extensions. Begin work on compiler directive lines.
More documentation work.
Reformat prescan.cc.
More work on compiler directive scanning.
Original-commit: flang-compiler/f18@38d0404e16ee7c797c69bc38fa64bcb55413884a
Reviewed-on: https://github.com/flang-compiler/f18/pull/29
Tree-same-pre-rewrite: false
diff --git a/flang/lib/parser/basic-parsers.h b/flang/lib/parser/basic-parsers.h
index 2804196..72f41b0 100644
--- a/flang/lib/parser/basic-parsers.h
+++ b/flang/lib/parser/basic-parsers.h
@@ -14,6 +14,7 @@
// This header defines the fundamental parser template classes and helper
// template functions. See parser-combinators.txt for documentation.
+#include "char-block.h"
#include "idioms.h"
#include "message.h"
#include "parse-state.h"
@@ -81,7 +82,7 @@
MessageContext context{state->context()};
ParseState backtrack{*state};
std::optional<resultType> result{parser_.Parse(state)};
- if (result) {
+ if (result.has_value()) {
// preserve any new messages
messages.Annex(state->messages());
state->messages()->swap(messages);
@@ -233,6 +234,7 @@
template<typename PA, typename PB> class AlternativeParser {
public:
using resultType = typename PA::resultType;
+ static_assert(std::is_same_v<resultType, typename PB::resultType>);
constexpr AlternativeParser(const AlternativeParser &) = default;
constexpr AlternativeParser(const PA &pa, const PB &pb) : pa_{pa}, pb_{pb} {}
std::optional<resultType> Parse(ParseState *state) const {
@@ -306,6 +308,7 @@
template<typename PA, typename PB> class RecoveryParser {
public:
using resultType = typename PA::resultType;
+ static_assert(std::is_same_v<resultType, typename PB::resultType>);
constexpr RecoveryParser(const RecoveryParser &) = default;
constexpr RecoveryParser(const PA &pa, const PB &pb) : pa_{pa}, pb_{pb} {}
std::optional<resultType> Parse(ParseState *state) const {
@@ -1197,20 +1200,22 @@
inline constexpr auto guard(bool truth) { return GuardParser(truth); }
-// nextChar is a parser that succeeds if the parsing state is not
-// at the end of its input, returning the next character and
+// nextCh is a parser that succeeds if the parsing state is not
+// at the end of its input, returning the next character location and
// advancing the parse when it does so.
-constexpr struct NextCharParser {
- using resultType = char;
- constexpr NextCharParser() {}
- std::optional<char> Parse(ParseState *state) const {
- std::optional<char> ch{state->GetNextChar()};
- if (!ch) {
+constexpr struct NextCh {
+ using resultType = const char *;
+ constexpr NextCh() {}
+ std::optional<const char *> Parse(ParseState *state) const {
+ if (state->IsAtEnd()) {
state->PutMessage("end of file"_en_US);
+ return {};
}
- return ch;
+ const char *at{state->GetLocation()};
+ state->UncheckedAdvance();
+ return {at};
}
-} nextChar;
+} nextCh;
// If a is a parser for nonstandard usage, extension(a) is a parser that
// is disabled in strict conformance mode and otherwise sets a violation flag
@@ -1226,7 +1231,7 @@
}
auto at = state->GetLocation();
auto result = parser_.Parse(state);
- if (result) {
+ if (result.has_value()) {
state->set_anyConformanceViolation();
if (state->warnOnNonstandardUsage()) {
state->PutMessage(at, "nonstandard usage"_en_US);
@@ -1274,6 +1279,29 @@
return DeprecatedParser<PA>(parser);
}
+// Parsing objects with "source" members.
+template<typename PA> class SourcedParser {
+public:
+ using resultType = typename PA::resultType;
+ constexpr SourcedParser(const SourcedParser &) = default;
+ constexpr SourcedParser(const PA &parser) : parser_{parser} {}
+ std::optional<resultType> Parse(ParseState *state) const {
+ const char *start{state->GetLocation()};
+ auto result = parser_.Parse(state);
+ if (result.has_value()) {
+ result->source = CharBlock{start, state->GetLocation()};
+ }
+ return result;
+ }
+
+private:
+ const PA parser_;
+};
+
+template<typename PA> inline constexpr auto sourced(const PA &parser) {
+ return SourcedParser<PA>{parser};
+}
+
constexpr struct GetUserState {
using resultType = UserState *;
constexpr GetUserState() {}
@@ -1300,15 +1328,6 @@
return {state->column()};
}
} getColumn;
-
-constexpr struct GetProvenance {
- using resultType = Provenance;
- constexpr GetProvenance() {}
- static std::optional<Provenance> Parse(ParseState *state) {
- return {state->GetProvenance()};
- }
-} getProvenance;
-
} // namespace parser
} // namespace Fortran
#endif // FORTRAN_PARSER_BASIC_PARSERS_H_
diff --git a/flang/lib/parser/char-block.h b/flang/lib/parser/char-block.h
new file mode 100644
index 0000000..85db0f9
--- /dev/null
+++ b/flang/lib/parser/char-block.h
@@ -0,0 +1,80 @@
+#ifndef FORTRAN_PARSER_CHAR_BLOCK_H_
+#define FORTRAN_PARSER_CHAR_BLOCK_H_
+
+// Describes a contiguous block of characters; does not own their storage.
+
+#include "interval.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstring>
+#include <string>
+#include <utility>
+
+namespace Fortran {
+namespace parser {
+
+class CharBlock {
+public:
+ CharBlock() {}
+ CharBlock(const char *x, std::size_t n = 1) : interval_{x, n} {}
+ CharBlock(const char *b, const char *e)
+ : interval_{b, static_cast<std::size_t>(e - b)} {}
+ CharBlock(const std::string &s) : interval_{s.data(), s.size()} {}
+ CharBlock(const CharBlock &) = default;
+ CharBlock(CharBlock &&) = default;
+ CharBlock &operator=(const CharBlock &) = default;
+ CharBlock &operator=(CharBlock &&) = default;
+
+ bool empty() const { return interval_.empty(); }
+ std::size_t size() const { return interval_.size(); }
+ const char *begin() const { return interval_.start(); }
+ const char *end() const { return interval_.start() + interval_.size(); }
+ const char &operator[](std::size_t j) const { return interval_.start()[j]; }
+
+ bool IsBlank() const {
+ for (char ch : *this) {
+ if (ch != ' ' && ch != '\t') {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ std::string ToString() const {
+ return std::string{interval_.start(), interval_.size()};
+ }
+
+ bool operator<(const CharBlock &that) const { return Compare(that) < 0; }
+ bool operator<=(const CharBlock &that) const { return Compare(that) <= 0; }
+ bool operator==(const CharBlock &that) const { return Compare(that) == 0; }
+ bool operator!=(const CharBlock &that) const { return Compare(that) != 0; }
+ bool operator>=(const CharBlock &that) const { return Compare(that) >= 0; }
+ bool operator>(const CharBlock &that) const { return Compare(that) > 0; }
+
+private:
+ int Compare(const CharBlock &that) const {
+ std::size_t bytes{std::min(size(), that.size())};
+ int cmp{std::memcmp(static_cast<const void *>(begin()),
+ static_cast<const void *>(that.begin()), bytes)};
+ if (cmp != 0) {
+ return cmp;
+ }
+ return size() < that.size() ? -1 : size() > that.size();
+ }
+
+ Interval<const char *> interval_{nullptr, 0};
+};
+} // namespace parser
+} // namespace Fortran
+
+// Specializations to enable std::unordered_map<CharBlock, ...> &c.
+template<> struct std::hash<Fortran::parser::CharBlock> {
+ std::size_t operator()(const Fortran::parser::CharBlock &x) const {
+ std::size_t hash{0}, bytes{x.size()};
+ for (std::size_t j{0}; j < bytes; ++j) {
+ hash = (hash * 31) ^ x[j];
+ }
+ return hash;
+ }
+};
+#endif // FORTRAN_PARSER_CHAR_BLOCK_H_
diff --git a/flang/lib/parser/char-buffer.cc b/flang/lib/parser/char-buffer.cc
index 185bcd9..2a3c017 100644
--- a/flang/lib/parser/char-buffer.cc
+++ b/flang/lib/parser/char-buffer.cc
@@ -1,12 +1,13 @@
#include "char-buffer.h"
#include "idioms.h"
#include <algorithm>
+#include <cstddef>
#include <cstring>
namespace Fortran {
namespace parser {
-char *CharBuffer::FreeSpace(size_t *n) {
+char *CharBuffer::FreeSpace(std::size_t *n) {
int offset{LastBlockOffset()};
if (blocks_.empty()) {
blocks_.emplace_front();
@@ -20,16 +21,16 @@
return last_->data + offset;
}
-void CharBuffer::Claim(size_t n) {
+void CharBuffer::Claim(std::size_t n) {
if (n > 0) {
bytes_ += n;
lastBlockEmpty_ = false;
}
}
-void CharBuffer::Put(const char *data, size_t n) {
- size_t chunk;
- for (size_t at{0}; at < n; at += chunk) {
+void CharBuffer::Put(const char *data, std::size_t n) {
+ std::size_t chunk;
+ for (std::size_t at{0}; at < n; at += chunk) {
char *to{FreeSpace(&chunk)};
chunk = std::min(n - at, chunk);
Claim(chunk);
diff --git a/flang/lib/parser/char-buffer.h b/flang/lib/parser/char-buffer.h
index 7ea6419c..1347f82 100644
--- a/flang/lib/parser/char-buffer.h
+++ b/flang/lib/parser/char-buffer.h
@@ -4,6 +4,7 @@
// Defines a simple expandable buffer suitable for efficiently accumulating
// a stream of bytes.
+#include <cstddef>
#include <forward_list>
#include <string>
#include <utility>
@@ -29,7 +30,7 @@
return *this;
}
- size_t size() const { return bytes_; }
+ std::size_t size() const { return bytes_; }
void clear() {
blocks_.clear();
@@ -38,15 +39,15 @@
lastBlockEmpty_ = false;
}
- char *FreeSpace(size_t *);
- void Claim(size_t);
- void Put(const char *data, size_t n);
+ char *FreeSpace(std::size_t *);
+ void Claim(std::size_t);
+ void Put(const char *data, std::size_t n);
void Put(const std::string &);
void Put(char x) { Put(&x, 1); }
private:
struct Block {
- static constexpr size_t capacity{1 << 20};
+ static constexpr std::size_t capacity{1 << 20};
char data[capacity];
};
@@ -76,7 +77,7 @@
++*this;
return result;
}
- iterator &operator+=(size_t n) {
+ iterator &operator+=(std::size_t n) {
while (n >= Block::capacity - offset_) {
n -= Block::capacity - offset_;
offset_ = 0;
@@ -110,7 +111,7 @@
int LastBlockOffset() const { return bytes_ % Block::capacity; }
std::forward_list<Block> blocks_;
std::forward_list<Block>::iterator last_{blocks_.end()};
- size_t bytes_{0};
+ std::size_t bytes_{0};
bool lastBlockEmpty_{false};
};
} // namespace parser
diff --git a/flang/lib/parser/characters.cc b/flang/lib/parser/characters.cc
index 8ead0b7..7953fb9 100644
--- a/flang/lib/parser/characters.cc
+++ b/flang/lib/parser/characters.cc
@@ -1,4 +1,6 @@
#include "characters.h"
+#include <cstddef>
+#include <optional>
namespace Fortran {
namespace parser {
@@ -52,9 +54,9 @@
return {};
}
-std::optional<size_t> CountCharacters(
- const char *p, size_t bytes, std::optional<int> (*cbf)(const char *)) {
- size_t chars{0};
+std::optional<std::size_t> CountCharacters(
+ const char *p, std::size_t bytes, std::optional<int> (*cbf)(const char *)) {
+ std::size_t chars{0};
const char *limit{p + bytes};
while (p < limit) {
++chars;
diff --git a/flang/lib/parser/characters.h b/flang/lib/parser/characters.h
index 4e1e1d0..d0a837e 100644
--- a/flang/lib/parser/characters.h
+++ b/flang/lib/parser/characters.h
@@ -5,6 +5,7 @@
// conversions here to avoid dependences upon <cctype> and
// also to accomodate Fortran tokenization.
+#include <cstddef>
#include <optional>
#include <string>
@@ -160,8 +161,8 @@
std::optional<int> UTF8CharacterBytes(const char *);
std::optional<int> EUC_JPCharacterBytes(const char *);
-std::optional<size_t> CountCharacters(
- const char *, size_t bytes, std::optional<int> (*)(const char *));
+std::optional<std::size_t> CountCharacters(
+ const char *, std::size_t bytes, std::optional<int> (*)(const char *));
} // namespace parser
} // namespace Fortran
#endif // FORTRAN_PARSER_CHARACTERS_H_
diff --git a/flang/lib/parser/debug-parser.h b/flang/lib/parser/debug-parser.h
index 664d029..f474a8c 100644
--- a/flang/lib/parser/debug-parser.h
+++ b/flang/lib/parser/debug-parser.h
@@ -7,6 +7,7 @@
#include "basic-parsers.h"
#include "parse-state.h"
+#include <cstddef>
#include <iostream>
#include <optional>
#include <string>
@@ -18,7 +19,8 @@
public:
using resultType = Success;
constexpr DebugParser(const DebugParser &) = default;
- constexpr DebugParser(const char *str, size_t n) : str_{str}, length_{n} {}
+ constexpr DebugParser(const char *str, std::size_t n)
+ : str_{str}, length_{n} {}
std::optional<Success> Parse(ParseState *state) const {
if (auto context = state->context()) {
context->Emit(std::cout, *state->cooked().allSources());
@@ -31,10 +33,10 @@
private:
const char *const str_;
- size_t length_;
+ std::size_t length_;
};
-constexpr DebugParser operator""_debug(const char str[], size_t n) {
+constexpr DebugParser operator""_debug(const char str[], std::size_t n) {
return DebugParser{str, n};
}
} // namespace parser
diff --git a/flang/lib/parser/grammar.h b/flang/lib/parser/grammar.h
index ffcaa84..f32155d 100644
--- a/flang/lib/parser/grammar.h
+++ b/flang/lib/parser/grammar.h
@@ -78,7 +78,6 @@
constexpr Parser<IntLiteralConstant> intLiteralConstant; // R708
constexpr Parser<KindParam> kindParam; // R709
constexpr Parser<RealLiteralConstant> realLiteralConstant; // R714
-constexpr Parser<ExponentPart> exponentPart; // R717
constexpr Parser<CharLength> charLength; // R723
constexpr Parser<CharLiteralConstant> charLiteralConstant; // R724
constexpr Parser<Initialization> initialization; // R743 & R805
@@ -179,15 +178,15 @@
using statementConstructor = construct<Statement<typename PA::resultType>>;
template<typename PA> inline constexpr auto unterminatedStatement(const PA &p) {
- return skipMany("\n"_tok) >> statementConstructor<PA>{}(getProvenance,
- maybe(label), isLabelOk, spaces >> p);
+ return skipMany("\n"_tok) >>
+ sourced(statementConstructor<PA>{}(maybe(label), isLabelOk, spaces >> p));
}
-constexpr auto endOfLine = CharMatch<'\n'>{} / skipMany("\n"_tok) ||
- fail<char>("expected end of line"_en_US);
+constexpr auto endOfLine = "\n"_ch / skipMany("\n"_tok) ||
+ fail<const char *>("expected end of line"_en_US);
constexpr auto endOfStmt = spaces >>
- (CharMatch<';'>{} / skipMany(";"_tok) / maybe(endOfLine) || endOfLine);
+ (";"_ch / skipMany(";"_tok) / maybe(endOfLine) || endOfLine);
template<typename PA> inline constexpr auto statement(const PA &p) {
return unterminatedStatement(p) / endOfStmt;
@@ -273,9 +272,6 @@
construct<OtherSpecificationStmt>{}(indirect(Parser<EquivalenceStmt>{})) ||
construct<OtherSpecificationStmt>{}(indirect(Parser<BasedPointerStmt>{})))
-// R516 keyword -> name
-constexpr auto keyword = name;
-
// R604 constant -> literal-constant | named-constant
// Used only via R607 int-constant and R845 data-stmt-constant.
TYPE_PARSER(construct<ConstantValue>{}(literalConstant) ||
@@ -298,8 +294,11 @@
".OR." >> pure(DefinedOperator::IntrinsicOperator::OR) ||
".EQV." >> pure(DefinedOperator::IntrinsicOperator::EQV) ||
".NEQV." >> pure(DefinedOperator::IntrinsicOperator::NEQV) ||
- extension(".XOR." >> pure(DefinedOperator::IntrinsicOperator::XOR));
-// TODO: .N./.A./.O./.X. abbreviations?
+ extension(".XOR." >> pure(DefinedOperator::IntrinsicOperator::XOR) ||
+ ".N." >> pure(DefinedOperator::IntrinsicOperator::NOT) ||
+ ".A." >> pure(DefinedOperator::IntrinsicOperator::AND) ||
+ ".O." >> pure(DefinedOperator::IntrinsicOperator::OR) ||
+ ".X." >> pure(DefinedOperator::IntrinsicOperator::XOR));
constexpr auto intrinsicOperator = "**" >>
pure(DefinedOperator::IntrinsicOperator::Power) ||
@@ -588,33 +587,21 @@
inContext("execution part"_en_US, many(executionPartConstruct));
// R602 underscore -> _
-constexpr CharMatch<'_'> underscore;
+constexpr auto underscore = "_"_ch;
+// R516 keyword -> name
// R601 alphanumeric-character -> letter | digit | underscore
+// R603 name -> letter [alphanumeric-character]...
// N.B. Don't accept an underscore if it is immediately followed by a
// quotation mark, so that kindParameter_"character literal" is parsed properly.
-constexpr auto otherIdCharacter =
- underscore / !(CharMatch<'\''>{} || CharMatch<'"'>{}) ||
- extension(
- CharMatch<'$'>{} || // PGI/ifort (and Cray/gfortran, but not first)
- CharMatch<'@'>{}); // Cray
-
-constexpr auto nonDigitIdCharacter = letter || otherIdCharacter;
-
-// R603 name -> letter [alphanumeric-character]...
-static inline Name listToString(std::list<char> &&chlist) {
- Name result;
- for (auto ch : chlist) {
- result += ToLowerCaseLetter(ch);
- }
- return result;
-}
-
-constexpr auto rawName = applyFunction(listToString,
- applyFunction(prepend<char>, nonDigitIdCharacter,
- many(nonDigitIdCharacter || digit)));
-
-TYPE_PARSER(spaces >> rawName)
+// PGI and ifort accept '$' in identifiers, even as the initial character.
+// Cray and gfortran accept '$', but not as the first character.
+// Cray accepts '@' as well.
+constexpr auto otherIdChar = underscore / !"'\""_ch || extension("$@"_ch);
+constexpr auto nonDigitIdChar = letter || otherIdChar;
+constexpr auto rawName = nonDigitIdChar >> many(nonDigitIdChar || digit);
+TYPE_PARSER(spaces >> sourced(attempt(rawName) >> construct<Name>{}))
+constexpr auto keyword = construct<Keyword>{}(name);
// R605 literal-constant ->
// int-literal-constant | real-literal-constant |
@@ -716,7 +703,8 @@
extension(construct<KindSelector>{}(
construct<KindSelector::StarSize>{}("*" >> digitString))))
-// R707 signed-int-literal-constant -> [sign] int-literal-constant
+// R710 signed-digit-string -> [sign] digit-string
+// N.B. Not a complete token -- no spaces are skipped.
static inline std::int64_t negate(std::uint64_t &&n) {
return -n; // TODO: check for overflow
}
@@ -725,11 +713,13 @@
return n; // TODO: check for overflow
}
-TYPE_PARSER(spaces >>
- construct<SignedIntLiteralConstant>{}(
- CharMatch<'-'>{} >> applyFunction(negate, digitString) ||
- maybe(CharMatch<'+'>{}) >> applyFunction(castToSigned, digitString),
- maybe(underscore >> kindParam)))
+constexpr auto signedDigitString = "-"_ch >>
+ applyFunction(negate, digitString) ||
+ maybe("+"_ch) >> applyFunction(castToSigned, digitString);
+
+// R707 signed-int-literal-constant -> [sign] int-literal-constant
+TYPE_PARSER(spaces >> sourced(construct<SignedIntLiteralConstant>{}(
+ signedDigitString, maybe(underscore >> kindParam))))
// R708 int-literal-constant -> digit-string [_ kind-param]
TYPE_PARSER(construct<IntLiteralConstant>{}(
@@ -739,16 +729,10 @@
TYPE_PARSER(construct<KindParam>{}(digitString) ||
construct<KindParam>{}(scalar(integer(constant(name)))))
-// R710 signed-digit-string -> [sign] digit-string
-// N.B. Not a complete token -- no spaces are skipped.
-constexpr auto signedDigitString = CharMatch<'-'>{} >>
- applyFunction(negate, digitString) ||
- maybe(CharMatch<'+'>{}) >> digitString;
-
// R712 sign -> + | -
// Not a complete token.
-constexpr auto sign = CharMatch<'+'>{} >> pure(Sign::Positive) ||
- CharMatch<'-'>{} >> pure(Sign::Negative);
+constexpr auto sign = "+"_ch >> pure(Sign::Positive) ||
+ "-"_ch >> pure(Sign::Negative);
// R713 signed-real-literal-constant -> [sign] real-literal-constant
constexpr auto signedRealLiteralConstant = spaces >>
@@ -758,37 +742,23 @@
// significand [exponent-letter exponent] [_ kind-param] |
// digit-string exponent-letter exponent [_ kind-param]
// R715 significand -> digit-string . [digit-string] | . digit-string
-// N.B. Preceding spaces are not skipped.
-TYPE_CONTEXT_PARSER("REAL literal constant"_en_US,
- construct<RealLiteralConstant>{}(some(digit),
- CharMatch<'.'>{} >>
- !(some(letter) >> CharMatch<'.'>{}) >> // don't misinterpret 1.AND.
- many(digit),
- maybe(exponentPart), maybe(underscore >> kindParam)) ||
- construct<RealLiteralConstant>{}(CharMatch<'.'>{} >> some(digit),
- maybe(exponentPart), maybe(underscore >> kindParam)) ||
- construct<RealLiteralConstant>{}(
- some(digit), exponentPart, maybe(underscore >> kindParam)))
-
// R716 exponent-letter -> E | D
// Extension: Q
-// Not a complete token.
-inline constexpr bool isEorD(char ch) {
- ch = ToLowerCaseLetter(ch);
- return ch == 'e' || ch == 'd';
-}
-
-inline constexpr bool isQ(char ch) { return ToLowerCaseLetter(ch) == 'q'; }
-
-constexpr CharPredicateGuardParser exponentEorD{
- isEorD, "expected exponent letter"_en_US};
-constexpr CharPredicateGuardParser exponentQ{
- isQ, "expected exponent letter"_en_US};
-
// R717 exponent -> signed-digit-string
-// Not a complete token.
-TYPE_PARSER(construct<ExponentPart>{}(
- extension(exponentQ) || exponentEorD, signedDigitString))
+// N.B. Preceding spaces are not skipped.
+constexpr auto exponentPart =
+ ("ed"_ch || extension("q"_ch)) >> signedDigitString;
+
+TYPE_CONTEXT_PARSER("REAL literal constant"_en_US,
+ construct<RealLiteralConstant>{}(
+ sourced(
+ (digitString >> "."_ch >>
+ !(some(letter) >> "."_ch /* don't misinterpret 1.AND. */) >>
+ maybe(digitString) >> maybe(exponentPart) >> ok ||
+ "."_ch >> digitString >> maybe(exponentPart) >> ok ||
+ digitString >> exponentPart >> ok) >>
+ construct<RealLiteralConstant::Real>{}),
+ maybe(underscore >> kindParam)))
// R718 complex-literal-constant -> ( real-part , imag-part )
TYPE_CONTEXT_PARSER("COMPLEX literal constant"_en_US,
@@ -844,8 +814,7 @@
// N.B. charLiteralConstantWithoutKind does not skip preceding spaces.
// N.B. the parsing of "name" takes care to not consume the '_'.
constexpr auto charLiteralConstantWithoutKind =
- CharMatch<'\''>{} >> CharLiteral<'\''>{} ||
- CharMatch<'"'>{} >> CharLiteral<'"'>{};
+ "'"_ch >> CharLiteral<'\''>{} || "\""_ch >> CharLiteral<'"'>{};
TYPE_CONTEXT_PARSER("CHARACTER literal constant"_en_US,
construct<CharLiteralConstant>{}(
@@ -1161,7 +1130,7 @@
optionalListBeforeColons(Parser<AttrSpec>{}),
nonemptyList(entityDecl)) ||
// PGI-only extension: don't require the colons
- // TODO: The standard requires the colons if the entity
+ // N.B.: The standard requires the colons if the entity
// declarations contain initializers.
extension(construct<TypeDeclarationStmt>{}(declarationTypeSpec,
defaulted("," >> nonemptyList(Parser<AttrSpec>{})),
@@ -1205,7 +1174,7 @@
maybe(coarraySpec), maybe("*" >> charLength), maybe(initialization)))
// R806 null-init -> function-reference
-// TODO: confirm that NULL still intrinsic
+// TODO: confirm in semantics that NULL still intrinsic in this scope
TYPE_PARSER("NULL ( )" >> construct<NullInit>{})
// R807 access-spec -> PUBLIC | PRIVATE
@@ -1503,8 +1472,8 @@
// R865 letter-spec -> letter [- letter]
TYPE_PARSER(spaces >> (construct<LetterSpec>{}(letter, maybe("-" >> letter)) ||
- construct<LetterSpec>{}(extension(otherIdCharacter),
- construct<std::optional<char>>{})))
+ construct<LetterSpec>{}(otherIdChar,
+ construct<std::optional<const char *>>{})))
// R867 import-stmt ->
// IMPORT [[::] import-name-list] |
@@ -1661,8 +1630,10 @@
// R1023 defined-binary-op -> . letter [letter]... .
// R1414 local-defined-operator -> defined-unary-op | defined-binary-op
// R1415 use-defined-operator -> defined-unary-op | defined-binary-op
-TYPE_PARSER(construct<DefinedOpName>{}(applyFunction(listToString,
- spaces >> CharMatch<'.'>{} >> some(letter) / CharMatch<'.'>{})))
+// N.B. The name of the operator is captured without the periods around it.
+TYPE_PARSER(spaces >> "."_ch >>
+ construct<DefinedOpName>{}(sourced(some(letter) >> construct<Name>{})) /
+ "."_ch)
// R911 data-ref -> part-ref [% part-ref]...
// R914 coindexed-named-object -> data-ref
@@ -1672,7 +1643,7 @@
static std::optional<Success> Parse(ParseState *state) {
if (std::optional<DefinedOpName> n{definedOpName.Parse(state)}) {
if (const auto *user = state->userState()) {
- if (user->IsDefinedOperator(n->v)) {
+ if (user->IsDefinedOperator(n->v.source)) {
return {Success{}};
}
}
@@ -2084,7 +2055,7 @@
// R1017 level-5-expr -> [level-5-expr equiv-op] equiv-operand
// R1021 equiv-op -> .EQV. | .NEQV.
// Logical equivalence is left-associative.
-// Extension: .XOR. as synonym for .NEQV. (TODO: is this the right precedence?)
+// Extension: .XOR. as synonym for .NEQV.
constexpr struct Level5Expr {
using resultType = Expr;
constexpr Level5Expr() {}
@@ -3423,7 +3394,7 @@
if (op.has_value()) {
if (auto ustate = state->userState()) {
if (const auto *name = std::get_if<DefinedOpName>(&op->u)) {
- ustate->NoteDefinedOperator(name->v);
+ ustate->NoteDefinedOperator(name->v.source);
}
}
}
@@ -3726,7 +3697,7 @@
// R1221 dtv-type-spec -> TYPE ( derived-type-spec ) |
// CLASS ( derived-type-spec )
//
-// There requirement productions are defined and used, but need not be
+// These requirement productions are defined and used, but need not be
// defined independently here in this file:
// R771 lbracket -> [
// R772 rbracket -> ]
diff --git a/flang/lib/parser/interval.h b/flang/lib/parser/interval.h
new file mode 100644
index 0000000..fc238b2
--- /dev/null
+++ b/flang/lib/parser/interval.h
@@ -0,0 +1,74 @@
+#ifndef FORTRAN_PARSER_INTERVAL_H_
+#define FORTRAN_PARSER_INTERVAL_H_
+
+// Defines a generalized template class Interval<A> to represent
+// the half-open interval [x .. x+n).
+
+#include "idioms.h"
+#include <cstddef>
+#include <utility>
+
+namespace Fortran {
+namespace parser {
+
+template<typename A> class Interval {
+public:
+ using type = A;
+ Interval() {}
+ Interval(const A &s, std::size_t n = 1) : start_{s}, size_{n} {}
+ Interval(A &&s, std::size_t n = 1) : start_{std::move(s)}, size_{n} {}
+ Interval(const Interval &) = default;
+ Interval(Interval &&) = default;
+ Interval &operator=(const Interval &) = default;
+ Interval &operator=(Interval &&) = default;
+
+ bool operator==(const Interval &that) const {
+ return start_ == that.start_ && size_ == that.size_;
+ }
+
+ const A &start() const { return start_; }
+ std::size_t size() const { return size_; }
+ bool empty() const { return size_ == 0; }
+
+ bool Contains(const A &x) const { return start_ <= x && x < start_ + size_; }
+ bool Contains(const Interval &that) const {
+ return Contains(that.start_) && Contains(that.start_ + (that.size_ - 1));
+ }
+ bool ImmediatelyPrecedes(const Interval &that) const {
+ return NextAfter() == that.start_;
+ }
+ void Annex(const Interval &that) {
+ size_ = (that.start_ + that.size_) - start_;
+ }
+ bool AnnexIfPredecessor(const Interval &that) {
+ if (ImmediatelyPrecedes(that)) {
+ size_ += that.size_;
+ return true;
+ }
+ return false;
+ }
+
+ std::size_t MemberOffset(const A &x) const {
+ CHECK(Contains(x));
+ return x - start_;
+ }
+ A OffsetMember(std::size_t n) const {
+ CHECK(n < size_);
+ return start_ + n;
+ }
+
+ A Last() const { return start_ + (size_ - 1); }
+ A NextAfter() const { return start_ + size_; }
+ Interval Prefix(std::size_t n) const { return {start_, std::min(size_, n)}; }
+ Interval Suffix(std::size_t n) const {
+ CHECK(n <= size_);
+ return {start_ + n, size_ - n};
+ }
+
+private:
+ A start_;
+ std::size_t size_{0};
+};
+} // namespace parser
+} // namespace Fortran
+#endif // FORTRAN_PARSER_INTERVAL_H_
diff --git a/flang/lib/parser/message.cc b/flang/lib/parser/message.cc
index 85c629f..0552753 100644
--- a/flang/lib/parser/message.cc
+++ b/flang/lib/parser/message.cc
@@ -1,5 +1,6 @@
#include "message.h"
#include <cstdarg>
+#include <cstddef>
#include <cstdio>
#include <cstring>
@@ -7,7 +8,7 @@
namespace parser {
std::ostream &operator<<(std::ostream &o, const MessageFixedText &t) {
- for (size_t j{0}; j < t.size(); ++j) {
+ for (std::size_t j{0}; j < t.size(); ++j) {
o << t.str()[j];
}
return o;
@@ -41,7 +42,7 @@
if (chars[1] == '\0') {
// one-time initialization of array used for permanant single-byte string
// pointers
- for (size_t j{0}; j < sizeof chars; ++j) {
+ for (std::size_t j{0}; j < sizeof chars; ++j) {
chars[j] = j;
}
}
diff --git a/flang/lib/parser/message.h b/flang/lib/parser/message.h
index b586645..4a6fbd0 100644
--- a/flang/lib/parser/message.h
+++ b/flang/lib/parser/message.h
@@ -6,6 +6,7 @@
#include "idioms.h"
#include "provenance.h"
+#include <cstddef>
#include <forward_list>
#include <memory>
#include <optional>
@@ -19,7 +20,7 @@
class MessageFixedText {
public:
MessageFixedText() {}
- constexpr MessageFixedText(const char str[], size_t n)
+ constexpr MessageFixedText(const char str[], std::size_t n)
: str_{str}, bytes_{n} {}
constexpr MessageFixedText(const MessageFixedText &) = default;
MessageFixedText(MessageFixedText &&) = default;
@@ -27,17 +28,17 @@
MessageFixedText &operator=(MessageFixedText &&) = default;
const char *str() const { return str_; }
- size_t size() const { return bytes_; }
+ std::size_t size() const { return bytes_; }
bool empty() const { return bytes_ == 0; }
std::string ToString() const;
private:
const char *str_{nullptr};
- size_t bytes_{0};
+ std::size_t bytes_{0};
};
-constexpr MessageFixedText operator""_en_US(const char str[], size_t n) {
+constexpr MessageFixedText operator""_en_US(const char str[], std::size_t n) {
return MessageFixedText{str, n};
}
@@ -55,14 +56,14 @@
// Represents a formatted rendition of "expected '%s'"_en_US on a constant text.
class MessageExpectedText {
public:
- MessageExpectedText(const char *s, size_t n) : str_{s}, bytes_{n} {}
+ MessageExpectedText(const char *s, std::size_t n) : str_{s}, bytes_{n} {}
explicit MessageExpectedText(char ch) : singleton_{ch} {}
MessageFixedText AsMessageFixedText() const;
private:
const char *str_{nullptr};
char singleton_;
- size_t bytes_{1};
+ std::size_t bytes_{1};
};
class Message;
@@ -132,7 +133,7 @@
const AllSources &allSources() const { return allSources_; }
Message &Put(Message &&m) {
- CHECK(m.provenance() < allSources_.size());
+ CHECK(allSources_.IsValid(m.provenance()));
if (messages_.empty()) {
messages_.emplace_front(std::move(m));
last_ = messages_.begin();
diff --git a/flang/lib/parser/parse-state.h b/flang/lib/parser/parse-state.h
index abed103..a6e6915 100644
--- a/flang/lib/parser/parse-state.h
+++ b/flang/lib/parser/parse-state.h
@@ -11,6 +11,7 @@
#include "idioms.h"
#include "message.h"
#include "provenance.h"
+#include <cstddef>
#include <cstring>
#include <list>
#include <memory>
@@ -53,7 +54,7 @@
}
void swap(ParseState &that) {
- constexpr size_t bytes{sizeof *this};
+ constexpr std::size_t bytes{sizeof *this};
char buffer[bytes];
std::memcpy(buffer, this, bytes);
std::memcpy(this, &that, bytes);
diff --git a/flang/lib/parser/parse-tree-visitor.h b/flang/lib/parser/parse-tree-visitor.h
index d89dc96..2760809 100644
--- a/flang/lib/parser/parse-tree-visitor.h
+++ b/flang/lib/parser/parse-tree-visitor.h
@@ -2,6 +2,7 @@
#define FORTRAN_PARSER_PARSE_TREE_VISITOR_H_
#include "parse-tree.h"
+#include <cstddef>
#include <optional>
#include <tuple>
#include <variant>
@@ -17,7 +18,7 @@
namespace Fortran {
namespace parser {
-// Default case for visitation of non-class data members (and strings)
+// Default case for visitation of non-class data members and strings
template<typename A, typename V>
typename std::enable_if<!std::is_class_v<A> ||
std::is_same_v<std::string, A>>::type
@@ -45,7 +46,7 @@
Walk(elem, visitor);
}
}
-template<size_t I = 0, typename Func, typename T>
+template<std::size_t I = 0, typename Func, typename T>
void ForEachInTuple(const T &tuple, Func func) {
if constexpr (I < std::tuple_size_v<T>) {
func(std::get<I>(tuple));
@@ -130,6 +131,12 @@
}
}
+template<typename V> void Walk(const Name &x, V &visitor) {
+ if (visitor.Pre(x)) {
+ visitor.Post(x);
+ }
+}
+
template<typename V> void Walk(const AcSpec &x, V &visitor) {
if (visitor.Pre(x)) {
Walk(x.type, visitor);
@@ -240,13 +247,16 @@
}
template<typename V> void Walk(const RealLiteralConstant &x, V &visitor) {
if (visitor.Pre(x)) {
- Walk(x.intPart, visitor);
- Walk(x.fraction, visitor);
- Walk(x.exponent, visitor);
+ Walk(x.real, visitor);
Walk(x.kind, visitor);
visitor.Post(x);
}
}
+template<typename V> void Walk(const RealLiteralConstant::Real &x, V &visitor) {
+ if (visitor.Pre(x)) {
+ visitor.Post(x);
+ }
+}
template<typename V> void Walk(const StructureComponent &x, V &visitor) {
if (visitor.Pre(x)) {
Walk(x.base, visitor);
diff --git a/flang/lib/parser/parse-tree.cc b/flang/lib/parser/parse-tree.cc
index cf98176..315edef 100644
--- a/flang/lib/parser/parse-tree.cc
+++ b/flang/lib/parser/parse-tree.cc
@@ -6,33 +6,6 @@
namespace Fortran {
namespace parser {
-// R714 real-literal-constant
-// R715 significand
-static std::string charListToString(std::list<char> &&cs) {
- std::string result;
- for (auto ch : cs) {
- result += ch;
- }
- return result;
-}
-
-RealLiteralConstant::RealLiteralConstant(std::list<char> &&i,
- std::list<char> &&f, std::optional<ExponentPart> &&expo,
- std::optional<KindParam> &&k)
- : intPart{charListToString(std::move(i))}, fraction{charListToString(
- std::move(f))},
- exponent(std::move(expo)), kind(std::move(k)) {}
-
-RealLiteralConstant::RealLiteralConstant(std::list<char> &&f,
- std::optional<ExponentPart> &&expo, std::optional<KindParam> &&k)
- : fraction{charListToString(std::move(f))}, exponent(std::move(expo)),
- kind(std::move(k)) {}
-
-RealLiteralConstant::RealLiteralConstant(
- std::list<char> &&i, ExponentPart &&expo, std::optional<KindParam> &&k)
- : intPart{charListToString(std::move(i))}, exponent(std::move(expo)),
- kind(std::move(k)) {}
-
// R867
ImportStmt::ImportStmt(Kind &&k, std::list<Name> &&n)
: kind{k}, names(std::move(n)) {
@@ -67,7 +40,7 @@
},
[](Substring &) -> ProcedureDesignator {
CHECK(!"can't get here");
- return {Name{""}};
+ return {Name{}};
}},
u);
}
@@ -158,7 +131,7 @@
},
[](SubscriptTriplet &) -> ActualArg {
CHECK(!"can't happen");
- return {Name{"bad"}};
+ return {Name{}};
}},
u);
}
diff --git a/flang/lib/parser/parse-tree.h b/flang/lib/parser/parse-tree.h
index e5bab7f..0caf8d6 100644
--- a/flang/lib/parser/parse-tree.h
+++ b/flang/lib/parser/parse-tree.h
@@ -9,6 +9,8 @@
// run-time I/O support library have been isolated into a distinct header file
// (viz., format-specification.h).
+#include "char-block.h"
+#include "characters.h"
#include "format-specification.h"
#include "idioms.h"
#include "indirection.h"
@@ -39,14 +41,19 @@
CLASS_TRAIT(TupleTrait);
// Most non-template classes in this file use these default definitions
-// for their move constructor and move assignment operator=.
-#define BOILERPLATE(classname) \
+// for their move constructor and move assignment operator=, and disable
+// their copy constructor and copy assignment operator=.
+#define COPY_AND_ASSIGN_BOILERPLATE(classname) \
classname(classname &&) = default; \
classname &operator=(classname &&) = default; \
- classname() = delete; \
classname(const classname &) = delete; \
classname &operator=(const classname &) = delete
+// Almost all classes in this file have no default constructor.
+#define BOILERPLATE(classname) \
+ COPY_AND_ASSIGN_BOILERPLATE(classname); \
+ classname() = delete
+
// Empty classes are often used below as alternatives in std::variant<>
// discriminated unions.
#define EMPTY_CLASS(classname) \
@@ -229,8 +236,10 @@
struct AssignedGotoStmt;
struct PauseStmt;
+// Cooked character stream locations
+using Location = const char *;
+
// Implicit definitions of the Standard
-using Keyword = std::string;
// R403 scalar-xyz -> xyz
// These template class wrappers correspond to the Standard's modifiers
@@ -290,10 +299,10 @@
// A wrapper for xzy-stmt productions that are statements, so that
// source provenances and labels have a uniform representation.
template<typename A> struct Statement {
- Statement(Provenance &&at, std::optional<long> &&lab, bool &&accept, A &&s)
- : provenance(at), label(std::move(lab)), isLabelInAcceptableField{accept},
+ Statement(std::optional<long> &&lab, bool &&accept, A &&s)
+ : label(std::move(lab)), isLabelInAcceptableField{accept},
statement(std::move(s)) {}
- Provenance provenance;
+ CharBlock source;
std::optional<Label> label;
bool isLabelInAcceptableField{true};
A statement;
@@ -479,7 +488,15 @@
WRAPPER_CLASS(Program, std::list<ProgramUnit>);
// R603 name -> letter [alphanumeric-character]...
-using Name = std::string;
+struct Name {
+ Name() {}
+ COPY_AND_ASSIGN_BOILERPLATE(Name);
+ CharBlock source;
+ // TODO: pointer to symbol table entity
+};
+
+// R516 keyword -> name
+WRAPPER_CLASS(Keyword, Name);
// R606 named-constant -> name
WRAPPER_CLASS(NamedConstant, Name);
@@ -671,6 +688,7 @@
// R707 signed-int-literal-constant -> [sign] int-literal-constant
struct SignedIntLiteralConstant {
TUPLE_CLASS_BOILERPLATE(SignedIntLiteralConstant);
+ CharBlock source;
std::tuple<std::int64_t, std::optional<KindParam>> t;
};
@@ -684,27 +702,21 @@
// R712 sign -> + | -
enum class Sign { Positive, Negative };
-// R717 exponent -> signed-digit-string
-struct ExponentPart {
- TUPLE_CLASS_BOILERPLATE(ExponentPart);
- std::tuple<char, std::int64_t> t;
-};
-
// R714 real-literal-constant ->
// significand [exponent-letter exponent] [_ kind-param] |
// digit-string exponent-letter exponent [_ kind-param]
// R715 significand -> digit-string . [digit-string] | . digit-string
+// R717 exponent -> signed-digit-string
struct RealLiteralConstant {
BOILERPLATE(RealLiteralConstant);
- RealLiteralConstant(std::list<char> &&, std::list<char> &&,
- std::optional<ExponentPart> &&, std::optional<KindParam> &&);
- RealLiteralConstant(std::list<char> &&, std::optional<ExponentPart> &&,
- std::optional<KindParam> &&);
- RealLiteralConstant(
- std::list<char> &&, ExponentPart &&, std::optional<KindParam> &&);
- std::string intPart;
- std::string fraction;
- std::optional<ExponentPart> exponent;
+ struct Real {
+ COPY_AND_ASSIGN_BOILERPLATE(Real);
+ Real() {}
+ CharBlock source;
+ };
+ RealLiteralConstant(Real &&r, std::optional<KindParam> &&k)
+ : real{std::move(r)}, kind{std::move(k)} {}
+ Real real;
std::optional<KindParam> kind;
};
@@ -1436,7 +1448,7 @@
// R865 letter-spec -> letter [- letter]
struct LetterSpec {
TUPLE_CLASS_BOILERPLATE(LetterSpec);
- std::tuple<char, std::optional<char>> t;
+ std::tuple<Location, std::optional<Location>> t;
};
// R864 implicit-spec -> declaration-type-spec ( letter-spec-list )
diff --git a/flang/lib/parser/parsing.cc b/flang/lib/parser/parsing.cc
index bc3f7ba..ab4bafd 100644
--- a/flang/lib/parser/parsing.cc
+++ b/flang/lib/parser/parsing.cc
@@ -22,6 +22,12 @@
anyFatalError_ = true;
return false;
}
+ if (sourceFile->bytes() == 0) {
+ ProvenanceRange range{allSources_.AddCompilerInsertion(path)};
+ messages_.Put(Message{range.start(), "file is empty"_en_US});
+ anyFatalError_ = true;
+ return false;
+ }
for (const auto &path : options.searchDirectories) {
allSources_.PushSearchPathDirectory(path);
@@ -40,7 +46,8 @@
.set_fixedFormColumnLimit(options.fixedFormColumns)
.set_encoding(options.encoding)
.set_enableBackslashEscapesInCharLiterals(options.enableBackslashEscapes)
- .set_enableOldDebugLines(options.enableOldDebugLines);
+ .set_enableOldDebugLines(options.enableOldDebugLines)
+ .AddCompilerDirectiveSentinel("dir$");
ProvenanceRange range{
allSources_.AddIncludedFile(*sourceFile, ProvenanceRange{})};
anyFatalError_ = !prescanner.Prescan(range);
diff --git a/flang/lib/parser/preprocessor.cc b/flang/lib/parser/preprocessor.cc
index a8a057ea..ac23bca 100644
--- a/flang/lib/parser/preprocessor.cc
+++ b/flang/lib/parser/preprocessor.cc
@@ -5,6 +5,7 @@
#include "prescan.h"
#include <algorithm>
#include <cinttypes>
+#include <cstddef>
#include <ctime>
#include <map>
#include <memory>
@@ -17,11 +18,11 @@
namespace parser {
Definition::Definition(
- const TokenSequence &repl, size_t firstToken, size_t tokens)
+ const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
: replacement_{Tokenize({}, repl, firstToken, tokens)} {}
Definition::Definition(const std::vector<std::string> &argNames,
- const TokenSequence &repl, size_t firstToken, size_t tokens,
+ const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
bool isVariadic)
: isFunctionLike_{true},
argumentCount_(argNames.size()), isVariadic_{isVariadic},
@@ -38,12 +39,12 @@
return was;
}
-static bool IsLegalIdentifierStart(const ContiguousChars &cpl) {
+static bool IsLegalIdentifierStart(const CharBlock &cpl) {
return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
}
TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
- const TokenSequence &token, size_t firstToken, size_t tokens) {
+ const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
std::map<std::string, std::string> args;
char argIndex{'A'};
for (const std::string &arg : argNames) {
@@ -51,8 +52,8 @@
args[arg] = "~"s + argIndex++;
}
TokenSequence result;
- for (size_t j{0}; j < tokens; ++j) {
- ContiguousChars tok{token[firstToken + j]};
+ for (std::size_t j{0}; j < tokens; ++j) {
+ CharBlock tok{token[firstToken + j]};
if (IsLegalIdentifierStart(tok)) {
auto it = args.find(tok.ToString());
if (it != args.end()) {
@@ -65,8 +66,8 @@
return result;
}
-static size_t AfterLastNonBlank(const TokenSequence &tokens) {
- for (size_t j{tokens.size()}; j > 0; --j) {
+static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
+ for (std::size_t j{tokens.size()}; j > 0; --j) {
if (!tokens[j - 1].IsBlank()) {
return j;
}
@@ -79,10 +80,10 @@
TokenSequence result;
Provenance quoteProvenance{allSources->CompilerInsertionProvenance('"')};
result.PutNextTokenChar('"', quoteProvenance);
- for (size_t j{0}; j < tokens.size(); ++j) {
- const ContiguousChars &token{tokens[j]};
- size_t bytes{token.size()};
- for (size_t k{0}; k < bytes; ++k) {
+ for (std::size_t j{0}; j < tokens.size(); ++j) {
+ const CharBlock &token{tokens[j]};
+ std::size_t bytes{token.size()};
+ for (std::size_t k{0}; k < bytes; ++k) {
char ch{token[k]};
Provenance from{tokens.GetTokenProvenance(j, k)};
if (ch == '"' || ch == '\\') {
@@ -102,10 +103,10 @@
bool pasting{false};
bool skipping{false};
int parenthesesNesting{0};
- size_t tokens{replacement_.size()};
- for (size_t j{0}; j < tokens; ++j) {
- const ContiguousChars &token{replacement_[j]};
- size_t bytes{token.size()};
+ std::size_t tokens{replacement_.size()};
+ for (std::size_t j{0}; j < tokens; ++j) {
+ const CharBlock &token{replacement_[j]};
+ std::size_t bytes{token.size()};
if (skipping) {
if (bytes == 1) {
if (token[0] == '(') {
@@ -117,11 +118,11 @@
continue;
}
if (bytes == 2 && token[0] == '~') {
- size_t index = token[1] - 'A';
+ std::size_t index = token[1] - 'A';
if (index >= args.size()) {
continue;
}
- size_t afterLastNonBlank{AfterLastNonBlank(result)};
+ std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
if (afterLastNonBlank > 0 &&
result[afterLastNonBlank - 1].ToString() == "#") {
// stringifying
@@ -130,8 +131,8 @@
}
result.Put(Stringify(args[index], allSources));
} else {
- size_t argTokens{args[index].size()};
- for (size_t k{0}; k < argTokens; ++k) {
+ std::size_t argTokens{args[index].size()};
+ for (std::size_t k{0}; k < argTokens; ++k) {
if (!pasting || !args[index][k].IsBlank()) {
result.Put(args[index], k);
pasting = false;
@@ -153,7 +154,7 @@
} else if (bytes == 11 && isVariadic_ &&
token.ToString() == "__VA_ARGs__") {
Provenance commaProvenance{allSources->CompilerInsertionProvenance(',')};
- for (size_t k{argumentCount_}; k < args.size(); ++k) {
+ for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
if (k > argumentCount_) {
result.Put(","s, commaProvenance);
}
@@ -211,10 +212,10 @@
bool Preprocessor::MacroReplacement(const TokenSequence &input,
const Prescanner &prescanner, TokenSequence *result) {
// Do quick scan for any use of a defined name.
- size_t tokens{input.size()};
- size_t j;
+ std::size_t tokens{input.size()};
+ std::size_t j;
for (j = 0; j < tokens; ++j) {
- size_t bytes{input[j].size()};
+ std::size_t bytes{input[j].size()};
if (bytes > 0 && IsLegalIdentifierStart(input[j][0]) &&
IsNameDefined(input[j])) {
break;
@@ -225,7 +226,7 @@
}
result->Put(input, 0, j);
for (; j < tokens; ++j) {
- const ContiguousChars &token{input[j]};
+ const CharBlock &token{input[j]};
if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
result->Put(input, j);
continue;
@@ -274,10 +275,10 @@
}
// Possible function-like macro call. Skip spaces and newlines to see
// whether '(' is next.
- size_t k{j};
+ std::size_t k{j};
bool leftParen{false};
while (++k < tokens) {
- const ContiguousChars &lookAhead{input[k]};
+ const CharBlock &lookAhead{input[k]};
if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
break;
@@ -287,7 +288,7 @@
result->Put(input, j);
continue;
}
- std::vector<size_t> argStart{++k};
+ std::vector<std::size_t> argStart{++k};
for (int nesting{0}; k < tokens; ++k) {
if (input[k].size() == 1) {
char ch{input[k][0]};
@@ -309,9 +310,10 @@
continue;
}
std::vector<TokenSequence> args;
- for (size_t n{0}; n < argStart.size(); ++n) {
- size_t at{argStart[n]};
- size_t count{(n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
+ for (std::size_t n{0}; n < argStart.size(); ++n) {
+ std::size_t at{argStart[n]};
+ std::size_t count{
+ (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
args.emplace_back(TokenSequence(input, at, count));
}
def.set_isDisabled(true);
@@ -336,8 +338,8 @@
return MacroReplacement(tokens, prescanner, &repl) ? repl : tokens;
}
-static size_t SkipBlanks(
- const TokenSequence &tokens, size_t at, size_t lastToken) {
+static std::size_t SkipBlanks(
+ const TokenSequence &tokens, std::size_t at, std::size_t lastToken) {
for (; at < lastToken; ++at) {
if (!tokens[at].IsBlank()) {
break;
@@ -347,9 +349,9 @@
}
static TokenSequence StripBlanks(
- const TokenSequence &token, size_t first, size_t tokens) {
+ const TokenSequence &token, std::size_t first, std::size_t tokens) {
TokenSequence noBlanks;
- for (size_t j{SkipBlanks(token, first, tokens)}; j < tokens;
+ for (std::size_t j{SkipBlanks(token, first, tokens)}; j < tokens;
j = SkipBlanks(token, j + 1, tokens)) {
noBlanks.Put(token, j);
}
@@ -357,8 +359,8 @@
}
void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
- size_t tokens{dir.size()};
- size_t j{SkipBlanks(dir, 0, tokens)};
+ std::size_t tokens{dir.size()};
+ std::size_t j{SkipBlanks(dir, 0, tokens)};
if (j == tokens) {
return;
}
@@ -373,10 +375,10 @@
if (IsDecimalDigit(dir[j][0]) || dir[j][0] == '"') {
return; // TODO: treat as #line
}
- size_t dirOffset{j};
+ std::size_t dirOffset{j};
std::string dirName{ToLowerCaseLetters(dir[dirOffset].ToString())};
j = SkipBlanks(dir, j + 1, tokens);
- ContiguousChars nameToken;
+ CharBlock nameToken;
if (j < tokens && IsLegalIdentifierStart(dir[j][0])) {
nameToken = dir[j];
}
@@ -570,6 +572,9 @@
dir.GetTokenProvenance(dirOffset));
return;
}
+ if (included->bytes() == 0) {
+ return;
+ }
ProvenanceRange fileRange{
allSources_->AddIncludedFile(*included, dir.GetProvenanceRange())};
if (!Prescanner{*prescanner}.Prescan(fileRange)) {
@@ -583,18 +588,19 @@
}
}
-ContiguousChars Preprocessor::SaveTokenAsName(const ContiguousChars &t) {
+CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
names_.push_back(t.ToString());
return {names_.back().data(), names_.back().size()};
}
-bool Preprocessor::IsNameDefined(const ContiguousChars &token) {
+bool Preprocessor::IsNameDefined(const CharBlock &token) {
return definitions_.find(token) != definitions_.end();
}
-static std::string GetDirectiveName(const TokenSequence &line, size_t *rest) {
- size_t tokens{line.size()};
- size_t j{SkipBlanks(line, 0, tokens)};
+static std::string GetDirectiveName(
+ const TokenSequence &line, std::size_t *rest) {
+ std::size_t tokens{line.size()};
+ std::size_t j{SkipBlanks(line, 0, tokens)};
if (j == tokens || line[j].ToString() != "#") {
*rest = tokens;
return "";
@@ -617,7 +623,7 @@
continue;
}
TokenSequence line{prescanner->TokenizePreprocessorDirective()};
- size_t rest{0};
+ std::size_t rest{0};
std::string dn{GetDirectiveName(line, &rest)};
if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
++nesting;
@@ -660,7 +666,8 @@
// 1: ? :
// 0: ,
static std::int64_t ExpressionValue(const TokenSequence &token,
- int minimumPrecedence, size_t *atToken, std::optional<Message> *error) {
+ int minimumPrecedence, std::size_t *atToken,
+ std::optional<Message> *error) {
enum Operator {
PARENS,
CONST,
@@ -736,7 +743,7 @@
opNameMap[","] = COMMA;
}
- size_t tokens{token.size()};
+ std::size_t tokens{token.size()};
if (*atToken >= tokens) {
*error = Message{
token.GetTokenProvenance(tokens - 1), "incomplete expression"_en_US};
@@ -744,7 +751,7 @@
}
// Parse and evaluate a primary or a unary operator and its operand.
- size_t opAt{*atToken};
+ std::size_t opAt{*atToken};
std::string t{token[opAt].ToString()};
enum Operator op;
std::int64_t left{0};
@@ -752,7 +759,7 @@
op = PARENS;
} else if (IsDecimalDigit(t[0])) {
op = CONST;
- size_t consumed{0};
+ std::size_t consumed{0};
left = std::stoll(t, &consumed, 0 /*base to be detected*/);
if (consumed < t.size()) {
*error = Message{token.GetTokenProvenance(opAt),
@@ -940,13 +947,13 @@
return 0; // silence compiler warning
}
-bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr, size_t first,
- size_t exprTokens, Prescanner *prescanner) {
+bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
+ std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
TokenSequence expr1{StripBlanks(expr, first, first + exprTokens)};
TokenSequence expr2;
- for (size_t j{0}; j < expr1.size(); ++j) {
+ for (std::size_t j{0}; j < expr1.size(); ++j) {
if (ToLowerCaseLetters(expr1[j].ToString()) == "defined") {
- ContiguousChars name;
+ CharBlock name;
if (j + 3 < expr1.size() && expr1[j + 1].ToString() == "(" &&
expr1[j + 3].ToString() == ")") {
name = expr1[j + 2];
@@ -964,7 +971,7 @@
}
TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
TokenSequence expr4{StripBlanks(expr3, 0, expr3.size())};
- size_t atToken{0};
+ std::size_t atToken{0};
std::optional<Message> error;
bool result{ExpressionValue(expr4, 0, &atToken, &error) != 0};
if (error.has_value()) {
diff --git a/flang/lib/parser/preprocessor.h b/flang/lib/parser/preprocessor.h
index 7b6d1a5..2a67944 100644
--- a/flang/lib/parser/preprocessor.h
+++ b/flang/lib/parser/preprocessor.h
@@ -7,8 +7,10 @@
// performed, so that special compiler command options &/or source file name
// extensions for preprocessing will not be necessary.
+#include "char-block.h"
#include "provenance.h"
#include "token-sequence.h"
+#include <cstddef>
#include <list>
#include <stack>
#include <string>
@@ -23,13 +25,13 @@
// Defines a macro
class Definition {
public:
- Definition(const TokenSequence &, size_t firstToken, size_t tokens);
+ Definition(const TokenSequence &, std::size_t firstToken, std::size_t tokens);
Definition(const std::vector<std::string> &argNames, const TokenSequence &,
- size_t firstToken, size_t tokens, bool isVariadic = false);
+ std::size_t firstToken, std::size_t tokens, bool isVariadic = false);
Definition(const std::string &predefined, AllSources *);
bool isFunctionLike() const { return isFunctionLike_; }
- size_t argumentCount() const { return argumentCount_; }
+ std::size_t argumentCount() const { return argumentCount_; }
bool isVariadic() const { return isVariadic_; }
bool isDisabled() const { return isDisabled_; }
bool isPredefined() const { return isPredefined_; }
@@ -41,10 +43,10 @@
private:
static TokenSequence Tokenize(const std::vector<std::string> &argNames,
- const TokenSequence &token, size_t firstToken, size_t tokens);
+ const TokenSequence &token, std::size_t firstToken, std::size_t tokens);
bool isFunctionLike_{false};
- size_t argumentCount_{0};
+ std::size_t argumentCount_{0};
bool isVariadic_{false};
bool isDisabled_{false};
bool isPredefined_{false};
@@ -73,17 +75,17 @@
enum class IsElseActive { No, Yes };
enum class CanDeadElseAppear { No, Yes };
- ContiguousChars SaveTokenAsName(const ContiguousChars &);
- bool IsNameDefined(const ContiguousChars &);
+ CharBlock SaveTokenAsName(const CharBlock &);
+ bool IsNameDefined(const CharBlock &);
TokenSequence ReplaceMacros(const TokenSequence &, const Prescanner &);
void SkipDisabledConditionalCode(
const std::string &, IsElseActive, Prescanner *, Provenance);
- bool IsIfPredicateTrue(
- const TokenSequence &expr, size_t first, size_t exprTokens, Prescanner *);
+ bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,
+ std::size_t exprTokens, Prescanner *);
AllSources *allSources_;
std::list<std::string> names_;
- std::unordered_map<ContiguousChars, Definition> definitions_;
+ std::unordered_map<CharBlock, Definition> definitions_;
std::stack<CanDeadElseAppear> ifStack_;
};
} // namespace parser
diff --git a/flang/lib/parser/prescan.cc b/flang/lib/parser/prescan.cc
index 2e71a43..131e35d 100644
--- a/flang/lib/parser/prescan.cc
+++ b/flang/lib/parser/prescan.cc
@@ -5,6 +5,7 @@
#include "preprocessor.h"
#include "source.h"
#include "token-sequence.h"
+#include <cstddef>
#include <cstring>
#include <sstream>
#include <utility>
@@ -23,13 +24,15 @@
fixedFormColumnLimit_{that.fixedFormColumnLimit_},
enableOldDebugLines_{that.enableOldDebugLines_},
enableBackslashEscapesInCharLiterals_{
- that.enableBackslashEscapesInCharLiterals_} {}
+ that.enableBackslashEscapesInCharLiterals_},
+ compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
+ compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
bool Prescanner::Prescan(ProvenanceRange range) {
AllSources *allSources{cooked_->allSources()};
ProvenanceRange around{allSources->GetContiguousRangeAround(range)};
startProvenance_ = range.start();
- size_t offset{0};
+ std::size_t offset{0};
const SourceFile *source{
allSources->GetSourceFile(startProvenance_, &offset)};
CHECK(source != nullptr);
@@ -39,7 +42,9 @@
BeginSourceLine(lineStart_);
TokenSequence tokens, preprocessed;
while (lineStart_ < limit_) {
- if (CommentLinesAndPreprocessorDirectives() && lineStart_ >= limit_) {
+ char sentinel[8];
+ if (CommentLinesAndPreprocessorDirectives(sentinel) &&
+ lineStart_ >= limit_) {
break;
}
BeginSourceLineAndAdvance();
@@ -54,14 +59,17 @@
if (preprocessor_->MacroReplacement(tokens, *this, &preprocessed)) {
preprocessed.PutNextTokenChar('\n', newlineProvenance);
preprocessed.CloseToken();
- if (!IsFixedFormCommentLine(preprocessed.data()) &&
- !IsFreeFormComment(preprocessed.data())) {
+ const char *ppd{preprocessed.data()};
+ if (IsFixedFormCompilerDirectiveLine(ppd, sentinel) ||
+ IsFreeFormCompilerDirectiveLine(ppd, sentinel) ||
+ !(IsFixedFormCommentLine(ppd) ||
+ IsFreeFormComment(ppd))) {
preprocessed.pop_back(); // clip the newline added above
- preprocessed.Emit(cooked_);
+ preprocessed.EmitLowerCase(cooked_);
}
preprocessed.clear();
} else {
- tokens.Emit(cooked_);
+ tokens.EmitLowerCase(cooked_);
}
tokens.clear();
cooked_->Put('\n', newlineProvenance);
@@ -205,7 +213,8 @@
if (inFixedForm_) {
SkipSpaces();
} else if (*at_ == ' ' || *at_ == '\t') {
- // Compress white space into a single character.
+ // Compress white space into a single space character.
+ // Discard white space at the end of a line.
const auto theSpace = at_;
NextChar();
SkipSpaces();
@@ -411,7 +420,7 @@
return false;
}
-bool Prescanner::IsFixedFormCommentLine(const char *start) {
+bool Prescanner::IsFixedFormCommentLine(const char *start) const {
if (start >= limit_ || !inFixedForm_) {
return false;
}
@@ -445,7 +454,7 @@
return *p == '\n';
}
-bool Prescanner::IsFreeFormComment(const char *p) {
+bool Prescanner::IsFreeFormComment(const char *p) const {
if (p >= limit_ || inFixedForm_) {
return false;
}
@@ -510,7 +519,11 @@
provenance);
return true;
}
- ProvenanceRange includeLineRange{provenance, static_cast<size_t>(p - start)};
+ if (included->bytes() == 0) {
+ return true;
+ }
+ ProvenanceRange includeLineRange{
+ provenance, static_cast<std::size_t>(p - start)};
ProvenanceRange fileRange{
allSources->AddIncludedFile(*included, includeLineRange)};
anyFatalErrors_ |= !Prescanner{*this}.Prescan(fileRange);
@@ -538,20 +551,28 @@
bool Prescanner::CommentLines() {
bool any{false};
+ char sentinel[8];
while (lineStart_ < limit_) {
- if (IsFixedFormCommentLine(lineStart_) || IsFreeFormComment(lineStart_)) {
- NextLine();
- any = true;
- } else {
+ if (IsFixedFormCompilerDirectiveLine(lineStart_, sentinel) ||
+ IsFreeFormCompilerDirectiveLine(lineStart_, sentinel) ||
+ !(IsFixedFormCommentLine(lineStart_) ||
+ IsFreeFormComment(lineStart_))) {
break;
}
+ NextLine();
+ any = true;
}
return any;
}
-bool Prescanner::CommentLinesAndPreprocessorDirectives() {
+bool Prescanner::CommentLinesAndPreprocessorDirectives(char *sentinel) {
bool any{false};
+ *sentinel = '\0';
while (lineStart_ < limit_) {
+ if (IsFixedFormCompilerDirectiveLine(lineStart_, sentinel) ||
+ IsFreeFormCompilerDirectiveLine(lineStart_, sentinel)) {
+ break;
+ }
if (IsFixedFormCommentLine(lineStart_) || IsFreeFormComment(lineStart_) ||
IncludeLine(lineStart_)) {
NextLine();
@@ -651,5 +672,86 @@
NextLine();
return true;
}
+
+bool Prescanner::IsFixedFormCompilerDirectiveLine(const char *start,
+ char *sentinel) const {
+ *sentinel = '\0';
+ if (start >= limit_ || !inFixedForm_) {
+ return false;
+ }
+ const char *p{start};
+ char c1{*p};
+ if (!(c1 == '*' || c1 == 'C' || c1 == 'c' || c1 == '!')) {
+ return false;
+ }
+ char *sp{sentinel};
+ ++p;
+ for (int col{2}; col < 6; ++col) {
+ char ch{*++p};
+ if (ch == '\n' || ch == '\t') {
+ return false;
+ }
+ if (ch != ' ') {
+ *sp++ = ToLowerCaseLetter(ch);
+ }
+ }
+ if (*p != ' ' && *p != '0') {
+ return false; // continuation card for directive
+ }
+ *sp = '\0';
+ return IsCompilerDirectiveSentinel(sentinel);
+}
+
+bool Prescanner::IsFreeFormCompilerDirectiveLine(const char *start,
+ char *sentinel) const {
+ *sentinel = '\0';
+ if (start >= limit_ || inFixedForm_) {
+ return false;
+ }
+ const char *p{start};
+ while (*p == ' ' || *p == '\t') {
+ ++p;
+ }
+ if (*p++ != '!') {
+ return false;
+ }
+ for (int j{0}; j < 5; ++p, ++j) {
+ if (*p == '\n' || *p == '&') {
+ break;
+ }
+ if (*p == ' ' || *p == '\t') {
+ if (j == 0) {
+ break;
+ }
+ sentinel[j] = '\0';
+ return IsCompilerDirectiveSentinel(sentinel);
+ }
+ sentinel[j] = ToLowerCaseLetter(*p);
+ }
+ return false;
+}
+
+Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
+ std::uint64_t packed{0};
+ for (char ch : dir) {
+ packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff);
+ }
+ compilerDirectiveBloomFilter_.set(packed % prime1);
+ compilerDirectiveBloomFilter_.set(packed % prime2);
+ compilerDirectiveSentinels_.insert(dir);
+ return *this;
+}
+
+bool Prescanner::IsCompilerDirectiveSentinel(const char *s) const {
+ std::uint64_t packed{0};
+ std::size_t n{0};
+ for (; s[n] != '\0'; ++n) {
+ packed = (packed << 8) | (s[n] & 0xff);
+ }
+ return n > 0 && compilerDirectiveBloomFilter_.test(packed % prime1) &&
+ compilerDirectiveBloomFilter_.test(packed % prime2) &&
+ compilerDirectiveSentinels_.find(std::string(s, n)) !=
+ compilerDirectiveSentinels_.end();
+}
} // namespace parser
} // namespace Fortran
diff --git a/flang/lib/parser/prescan.h b/flang/lib/parser/prescan.h
index 5097632..2b3175d 100644
--- a/flang/lib/parser/prescan.h
+++ b/flang/lib/parser/prescan.h
@@ -12,8 +12,10 @@
#include "message.h"
#include "provenance.h"
#include "token-sequence.h"
+#include <bitset>
#include <optional>
#include <string>
+#include <unordered_set>
namespace Fortran {
namespace parser {
@@ -51,6 +53,8 @@
return *this;
}
+ Prescanner &AddCompilerDirectiveSentinel(const std::string &);
+
bool Prescan(ProvenanceRange);
void NextLine();
@@ -110,14 +114,17 @@
void Hollerith(TokenSequence *, int);
bool PadOutCharacterLiteral(TokenSequence *);
bool CommentLines();
- bool CommentLinesAndPreprocessorDirectives();
- bool IsFixedFormCommentLine(const char *);
- bool IsFreeFormComment(const char *);
+ bool CommentLinesAndPreprocessorDirectives(char *sentinel);
+ bool IsFixedFormCommentLine(const char *) const;
+ bool IsFreeFormComment(const char *) const;
bool IncludeLine(const char *);
bool IsPreprocessorDirectiveLine(const char *) const;
const char *FixedFormContinuationLine();
bool FixedFormContinuation();
bool FreeFormContinuation();
+ bool IsFixedFormCompilerDirectiveLine(const char *, char *sentinel) const;
+ bool IsFreeFormCompilerDirectiveLine(const char *, char *sentinel) const;
+ bool IsCompilerDirectiveSentinel(const char *) const;
Messages *messages_;
CookedSource *cooked_;
@@ -146,6 +153,12 @@
cooked_->allSources()->CompilerInsertionProvenance('\\')};
ProvenanceRange sixSpaceProvenance_{
cooked_->allSources()->AddCompilerInsertion(" "s)};
+
+ // To avoid probing the set of active compiler directive sentinel strings
+ // on every comment line, they're checked first with a cheap Bloom filter.
+ static const int prime1{1019}, prime2{1021};
+ std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
+ std::unordered_set<std::string> compilerDirectiveSentinels_;
};
} // namespace parser
} // namespace Fortran
diff --git a/flang/lib/parser/provenance.cc b/flang/lib/parser/provenance.cc
index 0aef7a5..3f47eee 100644
--- a/flang/lib/parser/provenance.cc
+++ b/flang/lib/parser/provenance.cc
@@ -7,7 +7,7 @@
void OffsetToProvenanceMappings::clear() { provenanceMap_.clear(); }
-size_t OffsetToProvenanceMappings::size() const {
+std::size_t OffsetToProvenanceMappings::size() const {
if (provenanceMap_.empty()) {
return 0;
}
@@ -32,11 +32,11 @@
}
}
-ProvenanceRange OffsetToProvenanceMappings::Map(size_t at) const {
+ProvenanceRange OffsetToProvenanceMappings::Map(std::size_t at) const {
CHECK(!provenanceMap_.empty());
- size_t low{0}, count{provenanceMap_.size()};
+ std::size_t low{0}, count{provenanceMap_.size()};
while (count > 1) {
- size_t mid{low + (count >> 1)};
+ std::size_t mid{low + (count >> 1)};
if (provenanceMap_[mid].start > at) {
count = mid - low;
} else {
@@ -44,15 +44,15 @@
low = mid;
}
}
- size_t offset{at - provenanceMap_[low].start};
+ std::size_t offset{at - provenanceMap_[low].start};
return provenanceMap_[low].range.Suffix(offset);
}
-void OffsetToProvenanceMappings::RemoveLastBytes(size_t bytes) {
+void OffsetToProvenanceMappings::RemoveLastBytes(std::size_t bytes) {
for (; bytes > 0; provenanceMap_.pop_back()) {
CHECK(!provenanceMap_.empty());
ContiguousProvenanceMapping &last{provenanceMap_.back()};
- size_t chunk{last.range.size()};
+ std::size_t chunk{last.range.size()};
if (bytes < chunk) {
last.range = last.range.Prefix(chunk - bytes);
break;
@@ -128,7 +128,7 @@
std::visit(
visitors{
[&](const Inclusion &inc) {
- size_t offset{origin.covers.MemberOffset(at)};
+ std::size_t offset{origin.covers.MemberOffset(at)};
std::pair<int, int> pos{inc.source.FindOffsetLineAndColumn(offset)};
o << prefix << "at line " << pos.first << ", column " << pos.second;
if (echoSourceLine) {
@@ -165,7 +165,7 @@
o << prefix << "and expanded to\n"
<< indented << " " << mac.expansion << '\n'
<< indented << " ";
- for (size_t j{0}; origin.covers.OffsetMember(j) < at; ++j) {
+ for (std::size_t j{0}; origin.covers.OffsetMember(j) < at; ++j) {
o << (mac.expansion[j] == '\t' ? '\t' : ' ');
}
o << "^\n";
@@ -178,7 +178,7 @@
}
const SourceFile *AllSources::GetSourceFile(
- Provenance at, size_t *offset) const {
+ Provenance at, std::size_t *offset) const {
const Origin &origin{MapToOrigin(at)};
return std::visit(visitors{[&](const Inclusion &inc) {
if (offset != nullptr) {
@@ -212,7 +212,7 @@
}
int AllSources::GetLineNumber(Provenance at) const {
- size_t offset{0};
+ std::size_t offset{0};
const SourceFile *source{GetSourceFile(at, &offset)};
return source ? source->FindOffsetLineAndColumn(offset).first : 0;
}
@@ -239,7 +239,7 @@
AllSources::Origin::Origin(ProvenanceRange r, const std::string &text)
: u{CompilerInsertion{text}}, covers{r} {}
-const char &AllSources::Origin::operator[](size_t n) const {
+const char &AllSources::Origin::operator[](std::size_t n) const {
return std::visit(
visitors{[n](const Inclusion &inc) -> const char & {
return inc.source.content()[n];
@@ -253,9 +253,9 @@
const AllSources::Origin &AllSources::MapToOrigin(Provenance at) const {
CHECK(range_.Contains(at));
- size_t low{0}, count{origin_.size()};
+ std::size_t low{0}, count{origin_.size()};
while (count > 1) {
- size_t mid{low + (count >> 1)};
+ std::size_t mid{low + (count >> 1)};
if (at < origin_[mid].covers.start()) {
count = mid - low;
} else {
@@ -290,7 +290,7 @@
void OffsetToProvenanceMappings::Dump(std::ostream &o) const {
for (const ContiguousProvenanceMapping &m : provenanceMap_) {
- size_t n{m.range.size()};
+ std::size_t n{m.range.size()};
o << "offsets [" << m.start << ".." << (m.start + n - 1)
<< "] -> provenances ";
DumpRange(o, m.range);
diff --git a/flang/lib/parser/provenance.h b/flang/lib/parser/provenance.h
index 92a19f1..afbb97f 100644
--- a/flang/lib/parser/provenance.h
+++ b/flang/lib/parser/provenance.h
@@ -3,7 +3,9 @@
#include "char-buffer.h"
#include "idioms.h"
+#include "interval.h"
#include "source.h"
+#include <cstddef>
#include <map>
#include <memory>
#include <ostream>
@@ -37,20 +39,20 @@
class Provenance {
public:
Provenance() {}
- Provenance(size_t offset) : offset_{offset} { CHECK(offset > 0); }
+ Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); }
Provenance(const Provenance &that) = default;
Provenance(Provenance &&that) = default;
Provenance &operator=(const Provenance &that) = default;
Provenance &operator=(Provenance &&that) = default;
- size_t offset() const { return offset_; }
+ std::size_t offset() const { return offset_; }
Provenance operator+(ptrdiff_t n) const {
CHECK(n > -static_cast<ptrdiff_t>(offset_));
- return {offset_ + static_cast<size_t>(n)};
+ return {offset_ + static_cast<std::size_t>(n)};
}
- Provenance operator+(size_t n) const { return {offset_ + n}; }
- size_t operator-(Provenance that) const {
+ Provenance operator+(std::size_t n) const { return {offset_ + n}; }
+ std::size_t operator-(Provenance that) const {
CHECK(that <= *this);
return offset_ - that.offset_;
}
@@ -60,63 +62,7 @@
bool operator!=(Provenance that) const { return !(*this == that); }
private:
- size_t offset_{0};
-};
-
-template<typename A> class Interval {
-public:
- using type = A;
- Interval() {}
- Interval(const A &s, size_t n) : start_{s}, size_{n} {}
- Interval(A &&s, size_t n) : start_{std::move(s)}, size_{n} {}
- Interval(const Interval &) = default;
- Interval(Interval &&) = default;
- Interval &operator=(const Interval &) = default;
- Interval &operator=(Interval &&) = default;
-
- bool operator==(const Interval &that) const {
- return start_ == that.start_ && size_ == that.size_;
- }
-
- const A &start() const { return start_; }
- size_t size() const { return size_; }
- bool empty() const { return size_ == 0; }
-
- bool Contains(const A &x) const { return start_ <= x && x < start_ + size_; }
- bool Contains(const Interval &that) const {
- return Contains(that.start_) && Contains(that.start_ + (that.size_ - 1));
- }
- bool ImmediatelyPrecedes(const Interval &that) const {
- return NextAfter() == that.start_;
- }
- bool AnnexIfPredecessor(const Interval &that) {
- if (ImmediatelyPrecedes(that)) {
- size_ += that.size_;
- return true;
- }
- return false;
- }
-
- size_t MemberOffset(const A &x) const {
- CHECK(Contains(x));
- return x - start_;
- }
- A OffsetMember(size_t n) const {
- CHECK(n < size_);
- return start_ + n;
- }
-
- A Last() const { return start_ + (size_ - 1); }
- A NextAfter() const { return start_ + size_; }
- Interval Prefix(size_t n) const { return {start_, std::min(size_, n)}; }
- Interval Suffix(size_t n) const {
- CHECK(n <= size_);
- return {start_ + n, size_ - n};
- }
-
-private:
- A start_;
- size_t size_{0};
+ std::size_t offset_{0};
};
using ProvenanceRange = Interval<Provenance>;
@@ -128,18 +74,18 @@
class OffsetToProvenanceMappings {
public:
OffsetToProvenanceMappings() {}
- size_t size() const;
+ std::size_t size() const;
void clear();
void shrink_to_fit() { provenanceMap_.shrink_to_fit(); }
void Put(ProvenanceRange);
void Put(const OffsetToProvenanceMappings &);
- ProvenanceRange Map(size_t at) const;
- void RemoveLastBytes(size_t);
+ ProvenanceRange Map(std::size_t at) const;
+ void RemoveLastBytes(std::size_t);
void Dump(std::ostream &) const;
private:
struct ContiguousProvenanceMapping {
- size_t start;
+ std::size_t start;
ProvenanceRange range;
};
@@ -151,7 +97,7 @@
AllSources();
~AllSources();
- size_t size() const { return range_.size(); }
+ std::size_t size() const { return range_.size(); }
const char &operator[](Provenance) const;
void PushSearchPathDirectory(std::string);
@@ -170,12 +116,13 @@
}
void Identify(std::ostream &, Provenance, const std::string &prefix,
bool echoSourceLine = false) const;
- const SourceFile *GetSourceFile(Provenance, size_t *offset = nullptr) const;
+ const SourceFile *GetSourceFile(
+ Provenance, std::size_t *offset = nullptr) const;
ProvenanceRange GetContiguousRangeAround(ProvenanceRange) const;
std::string GetPath(Provenance) const; // __FILE__
int GetLineNumber(Provenance) const; // __LINE__
Provenance CompilerInsertionProvenance(char ch);
- Provenance CompilerInsertionProvenance(const char *, size_t);
+ Provenance CompilerInsertionProvenance(const char *, std::size_t);
void Dump(std::ostream &) const;
private:
@@ -202,7 +149,7 @@
const std::string &expansion);
Origin(ProvenanceRange, const std::string &);
- const char &operator[](size_t) const;
+ const char &operator[](std::size_t) const;
std::variant<Inclusion, Macro, CompilerInsertion> u;
ProvenanceRange covers, replaces;
@@ -221,16 +168,16 @@
public:
explicit CookedSource(AllSources *sources) : allSources_{sources} {}
- size_t size() const { return data_.size(); }
- const char &operator[](size_t n) const { return data_[n]; }
- const char &at(size_t n) const { return data_.at(n); }
+ std::size_t size() const { return data_.size(); }
+ const char &operator[](std::size_t n) const { return data_[n]; }
+ const char &at(std::size_t n) const { return data_.at(n); }
AllSources *allSources() const { return allSources_; }
ProvenanceRange GetProvenance(const char *) const;
void Identify(std::ostream &, const char *) const;
- void Put(const char *data, size_t bytes) { buffer_.Put(data, bytes); }
+ void Put(const char *data, std::size_t bytes) { buffer_.Put(data, bytes); }
void Put(char ch) { buffer_.Put(&ch, 1); }
void Put(char ch, Provenance p) {
buffer_.Put(&ch, 1);
diff --git a/flang/lib/parser/source.cc b/flang/lib/parser/source.cc
index 6970736..e41ca68 100644
--- a/flang/lib/parser/source.cc
+++ b/flang/lib/parser/source.cc
@@ -3,6 +3,7 @@
#include "idioms.h"
#include <algorithm>
#include <cerrno>
+#include <cstddef>
#include <cstring>
#include <fcntl.h>
#include <memory>
@@ -19,13 +20,14 @@
SourceFile::~SourceFile() { Close(); }
-static std::vector<size_t> FindLineStarts(const char *source, size_t bytes) {
+static std::vector<std::size_t> FindLineStarts(
+ const char *source, std::size_t bytes) {
if (bytes == 0) {
return {};
}
CHECK(source[bytes - 1] == '\n' && "missing ultimate newline");
- std::vector<size_t> result;
- size_t at{0};
+ std::vector<std::size_t> result;
+ std::size_t at{0};
do {
result.push_back(at);
const void *vp{static_cast<const void *>(&source[at])};
@@ -87,7 +89,7 @@
// Try to map the the source file into the process' address space.
if (S_ISREG(statbuf.st_mode)) {
- bytes_ = static_cast<size_t>(statbuf.st_size);
+ bytes_ = static_cast<std::size_t>(statbuf.st_size);
if (bytes_ > 0) {
void *vp = mmap(0, bytes_, PROT_READ, MAP_SHARED, fileDescriptor_, 0);
if (vp != MAP_FAILED) {
@@ -110,7 +112,7 @@
// contiguous block.
CharBuffer buffer;
while (true) {
- size_t count;
+ std::size_t count;
char *to{buffer.FreeSpace(&count)};
ssize_t got{read(fileDescriptor_, to, count)};
if (got < 0) {
@@ -164,14 +166,14 @@
path_.clear();
}
-std::pair<int, int> SourceFile::FindOffsetLineAndColumn(size_t at) const {
+std::pair<int, int> SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
CHECK(at < bytes_);
if (lineStart_.empty()) {
return {1, static_cast<int>(at + 1)};
}
- size_t low{0}, count{lineStart_.size()};
+ std::size_t low{0}, count{lineStart_.size()};
while (count > 1) {
- size_t mid{low + (count >> 1)};
+ std::size_t mid{low + (count >> 1)};
if (lineStart_[mid] > at) {
count = mid - low;
} else {
diff --git a/flang/lib/parser/source.h b/flang/lib/parser/source.h
index e6f5b7b..9ee176a 100644
--- a/flang/lib/parser/source.h
+++ b/flang/lib/parser/source.h
@@ -5,6 +5,7 @@
// - Line ending markers are converted to single newline characters
// - A newline character is added to the last line of the file if one is needed
+#include <cstddef>
#include <sstream>
#include <string>
#include <utility>
@@ -23,13 +24,13 @@
~SourceFile();
std::string path() const { return path_; }
const char *content() const { return content_; }
- size_t bytes() const { return bytes_; }
- size_t lines() const { return lineStart_.size(); }
+ std::size_t bytes() const { return bytes_; }
+ std::size_t lines() const { return lineStart_.size(); }
bool Open(std::string path, std::stringstream *error);
void Close();
- std::pair<int, int> FindOffsetLineAndColumn(size_t) const;
- size_t GetLineStartOffset(int lineNumber) const {
+ std::pair<int, int> FindOffsetLineAndColumn(std::size_t) const;
+ std::size_t GetLineStartOffset(int lineNumber) const {
return lineStart_.at(lineNumber - 1);
}
@@ -38,8 +39,8 @@
int fileDescriptor_{-1};
bool isMemoryMapped_{false};
const char *content_{nullptr};
- size_t bytes_{0};
- std::vector<size_t> lineStart_;
+ std::size_t bytes_{0};
+ std::vector<std::size_t> lineStart_;
};
} // namespace parser
} // namespace Fortran
diff --git a/flang/lib/parser/token-parsers.h b/flang/lib/parser/token-parsers.h
index d7f422b2..e1a6f0e 100644
--- a/flang/lib/parser/token-parsers.h
+++ b/flang/lib/parser/token-parsers.h
@@ -8,6 +8,7 @@
#include "characters.h"
#include "idioms.h"
#include "provenance.h"
+#include <cstddef>
#include <cstring>
#include <functional>
#include <limits>
@@ -18,59 +19,73 @@
namespace Fortran {
namespace parser {
-class CharPredicateGuardParser {
+class CharPredicateGuard {
public:
- using resultType = char;
- constexpr CharPredicateGuardParser(
- const CharPredicateGuardParser &) = default;
- constexpr CharPredicateGuardParser(bool (*f)(char), MessageFixedText t)
- : predicate_{f}, text_{t} {}
- std::optional<char> Parse(ParseState *state) const {
- auto at = state->GetLocation();
- if (std::optional<char> result{nextChar.Parse(state)}) {
- if (predicate_(*result)) {
- return result;
+ using resultType = const char *;
+ constexpr CharPredicateGuard(const CharPredicateGuard &) = default;
+ constexpr CharPredicateGuard(bool (*f)(char), MessageFixedText m)
+ : predicate_{f}, messageText_{m} {}
+ std::optional<const char *> Parse(ParseState *state) const {
+ const char *at{state->GetLocation()};
+ if (!state->IsAtEnd()) {
+ if (predicate_(*at)) {
+ state->UncheckedAdvance();
+ return {at};
}
}
- state->PutMessage(at, text_);
+ state->PutMessage(at, messageText_);
return {};
}
private:
bool (*const predicate_)(char);
- const MessageFixedText text_;
+ const MessageFixedText messageText_;
};
-constexpr CharPredicateGuardParser digit{
- IsDecimalDigit, "expected digit"_en_US};
+constexpr auto letter = CharPredicateGuard{IsLetter, "expected letter"_en_US};
+constexpr auto digit =
+ CharPredicateGuard{IsDecimalDigit, "expected digit"_en_US};
-constexpr auto letter = applyFunction(ToLowerCaseLetter,
- CharPredicateGuardParser{IsLetter, "expected letter"_en_US});
-
-template<char good> class CharMatch {
+// "x"_ch matches one instance of the character 'x' without skipping any
+// spaces before or after. The parser returns the location of the character
+// on success.
+class AnyOfChar {
public:
- using resultType = char;
- constexpr CharMatch() {}
- static std::optional<char> Parse(ParseState *state) {
- auto at = state->GetLocation();
- std::optional<char> result{nextChar.Parse(state)};
- if (result && *result != good) {
- result.reset();
+ using resultType = const char *;
+ constexpr AnyOfChar(const AnyOfChar &) = default;
+ constexpr AnyOfChar(const char *chars, std::size_t n)
+ : chars_{chars}, bytes_{n} {}
+ std::optional<const char *> Parse(ParseState *state) const {
+ const char *at{state->GetLocation()};
+ if (!state->IsAtEnd()) {
+ const char *p{chars_};
+ for (std::size_t j{0}; j < bytes_ && *p != '\0'; ++j, ++p) {
+ if (*at == *p) {
+ state->UncheckedAdvance();
+ return {at};
+ }
+ }
}
- if (!result) {
- state->PutMessage(at, MessageExpectedText{good});
- }
- return {result};
+ state->PutMessage(at, MessageExpectedText{chars_, bytes_});
+ return {};
}
+
+private:
+ const char *const chars_;
+ const std::size_t bytes_{std::numeric_limits<std::size_t>::max()};
};
+constexpr AnyOfChar operator""_ch(const char str[], std::size_t n) {
+ return AnyOfChar{str, n};
+}
+
// Skips over spaces. Always succeeds.
constexpr struct Spaces {
using resultType = Success;
constexpr Spaces() {}
static std::optional<Success> Parse(ParseState *state) {
while (std::optional<char> ch{state->PeekAtNextChar()}) {
- if (ch != ' ' && ch != '\t') {
+ if (ch != ' ') {
break;
}
state->UncheckedAdvance();
@@ -83,37 +98,41 @@
public:
using resultType = Success;
constexpr TokenStringMatch(const TokenStringMatch &) = default;
- constexpr TokenStringMatch(const char *str, size_t n)
+ constexpr TokenStringMatch(const char *str, std::size_t n)
: str_{str}, bytes_{n} {}
constexpr TokenStringMatch(const char *str) : str_{str} {}
std::optional<Success> Parse(ParseState *state) const {
- auto at = state->GetLocation();
spaces.Parse(state);
+ const char *start{state->GetLocation()};
const char *p{str_};
- std::optional<char> ch; // initially empty
- for (size_t j{0}; j < bytes_ && *p != '\0'; ++j, ++p) {
+ std::optional<const char *> at; // initially empty
+ for (std::size_t j{0}; j < bytes_ && *p != '\0'; ++j, ++p) {
const auto spaceSkipping{*p == ' '};
if (spaceSkipping) {
if (j + 1 == bytes_ || p[1] == ' ' || p[1] == '\0') {
continue; // redundant; ignore
}
}
- if (!ch.has_value() && !(ch = nextChar.Parse(state))) {
- return {};
+ if (!at.has_value()) {
+ at = nextCh.Parse(state);
+ if (!at.has_value()) {
+ return {};
+ }
}
if (spaceSkipping) {
- // medial space: 0 or more spaces/tabs accepted, none required
+ // medial space: space accepted, none required
// TODO: designate and enforce free-form mandatory white space
- while (*ch == ' ' || *ch == '\t') {
- if (!(ch = nextChar.Parse(state))) {
+ if (**at == ' ') {
+ at = nextCh.Parse(state);
+ if (!at.has_value()) {
return {};
}
}
- // ch remains full for next iteration
- } else if (IsSameApartFromCase(*ch, *p)) {
- ch.reset();
+ // 'at' remains full for next iteration
+ } else if (**at == ToLowerCaseLetter(*p)) {
+ at.reset();
} else {
- state->PutMessage(at, MessageExpectedText{str_, bytes_});
+ state->PutMessage(start, MessageExpectedText{str_, bytes_});
return {};
}
}
@@ -122,10 +141,10 @@
private:
const char *const str_;
- const size_t bytes_{std::numeric_limits<size_t>::max()};
+ const std::size_t bytes_{std::numeric_limits<std::size_t>::max()};
};
-constexpr TokenStringMatch operator""_tok(const char str[], size_t n) {
+constexpr TokenStringMatch operator""_tok(const char str[], std::size_t n) {
return TokenStringMatch{str, n};
}
@@ -167,11 +186,11 @@
using resultType = Result;
static std::optional<Result> Parse(ParseState *state) {
auto at = state->GetLocation();
- std::optional<char> och{nextChar.Parse(state)};
+ std::optional<const char *> och{nextCh.Parse(state)};
if (!och.has_value()) {
return {};
}
- char ch{*och};
+ char ch{**och};
if (ch == '\n') {
state->PutMessage(at, "unclosed character constant"_en_US);
return {};
@@ -179,10 +198,10 @@
if (ch != '\\') {
return {Result::Bare(ch)};
}
- if (!(och = nextChar.Parse(state)).has_value()) {
+ if (!(och = nextCh.Parse(state)).has_value()) {
return {};
}
- ch = *och;
+ ch = **och;
if (ch == '\n') {
state->PutMessage(at, "unclosed character constant"_en_US);
return {};
@@ -193,21 +212,25 @@
if (IsOctalDigit(ch)) {
ch -= '0';
for (int j = (ch > 3 ? 1 : 2); j-- > 0;) {
- static constexpr auto octalDigit = attempt(CharPredicateGuardParser{
- IsOctalDigit, "expected octal digit"_en_US});
+ static constexpr auto octalDigit =
+ CharPredicateGuard{IsOctalDigit, "expected octal digit"_en_US};
och = octalDigit.Parse(state);
if (och.has_value()) {
- ch = 8 * ch + *och - '0';
+ ch = 8 * ch + **och - '0';
+ } else {
+ break;
}
}
} else if (ch == 'x' || ch == 'X') {
ch = 0;
for (int j = 0; j++ < 2;) {
- static constexpr auto hexDigit = attempt(CharPredicateGuardParser{
- IsHexadecimalDigit, "expected hexadecimal digit"_en_US});
+ static constexpr auto hexDigit = CharPredicateGuard{
+ IsHexadecimalDigit, "expected hexadecimal digit"_en_US};
och = hexDigit.Parse(state);
if (och.has_value()) {
- ch = 16 * ch + HexadecimalDigitValue(*och);
+ ch = 16 * ch + HexadecimalDigitValue(**och);
+ } else {
+ break;
}
}
} else {
@@ -222,9 +245,10 @@
static std::optional<std::string> Parse(ParseState *state) {
std::string str;
static constexpr auto nextch = attempt(CharLiteralChar{});
+ static char q{quote};
while (std::optional<CharLiteralChar::Result> ch{nextch.Parse(state)}) {
if (ch->ch == quote && !ch->wasEscaped) {
- static constexpr auto doubled = attempt(CharMatch<quote>{});
+ static constexpr auto doubled = attempt(AnyOfChar{&q, 1});
if (!doubled.Parse(state).has_value()) {
return {str};
}
@@ -254,60 +278,64 @@
static std::optional<std::uint64_t> Parse(ParseState *state) {
std::optional<int> shift;
auto baseChar = [&shift](char ch) -> bool {
- switch (toupper(ch)) {
- case 'B': shift = 1; return true;
- case 'O': shift = 3; return true;
- case 'Z': shift = 4; return true;
- case 'X': shift = 4; return true;
+ switch (ch) {
+ case 'b': shift = 1; return true;
+ case 'o': shift = 3; return true;
+ case 'z': shift = 4; return true;
+ case 'x': shift = 4; return true;
default: return false;
}
};
spaces.Parse(state);
- auto ch = nextChar.Parse(state);
- if (!ch) {
+ const char *start{state->GetLocation()};
+ std::optional<const char *> at{nextCh.Parse(state)};
+ if (!at.has_value()) {
return {};
}
- if (toupper(*ch) == 'X' && !IsNonstandardUsageOk(state)) {
+ if (**at == 'x' && !IsNonstandardUsageOk(state)) {
return {};
}
- if (baseChar(*ch) && !(ch = nextChar.Parse(state))) {
- return {};
+ if (baseChar(**at)) {
+ at = nextCh.Parse(state);
+ if (!at.has_value()) {
+ return {};
+ }
}
- char quote = *ch;
+ char quote = **at;
if (quote != '\'' && quote != '"') {
return {};
}
- auto at = state->GetLocation();
std::string content;
while (true) {
- if (!(ch = nextChar.Parse(state))) {
+ at = nextCh.Parse(state);
+ if (!at.has_value()) {
return {};
}
- if (*ch == quote) {
+ if (**at == quote) {
break;
}
- if (*ch == ' ') {
+ if (**at == ' ') {
continue;
}
- if (!IsHexadecimalDigit(*ch)) {
+ if (!IsHexadecimalDigit(**at)) {
return {};
}
- content += *ch;
+ content += **at;
}
if (!shift) {
// extension: base allowed to appear as suffix, too
- if (!IsNonstandardUsageOk(state) || !(ch = nextChar.Parse(state)) ||
- !baseChar(*ch)) {
+ if (!IsNonstandardUsageOk(state) || !(at = nextCh.Parse(state)) ||
+ !baseChar(**at)) {
return {};
}
}
if (content.empty()) {
- state->PutMessage(at, "no digit in BOZ literal"_en_US);
+ state->PutMessage(start, "no digit in BOZ literal"_en_US);
return {};
}
@@ -315,13 +343,13 @@
for (auto digit : content) {
digit = HexadecimalDigitValue(digit);
if ((digit >> *shift) > 0) {
- state->PutMessage(at, "bad digit in BOZ literal"_en_US);
+ state->PutMessage(start, "bad digit in BOZ literal"_en_US);
return {};
}
std::uint64_t was{value};
value <<= *shift;
if ((value >> *shift) != was) {
- state->PutMessage(at, "excessive digits in BOZ literal"_en_US);
+ state->PutMessage(start, "excessive digits in BOZ literal"_en_US);
return {};
}
value |= digit;
@@ -335,26 +363,25 @@
using resultType = std::uint64_t;
static std::optional<std::uint64_t> Parse(ParseState *state) {
static constexpr auto getDigit = attempt(digit);
- auto at = state->GetLocation();
- std::optional<char> firstDigit{getDigit.Parse(state)};
- if (!firstDigit) {
+ std::optional<const char *> firstDigit{getDigit.Parse(state)};
+ if (!firstDigit.has_value()) {
return {};
}
- std::uint64_t value = *firstDigit - '0';
+ std::uint64_t value = **firstDigit - '0';
bool overflow{false};
while (auto nextDigit{getDigit.Parse(state)}) {
if (value > std::numeric_limits<std::uint64_t>::max() / 10) {
overflow = true;
}
value *= 10;
- int digitValue = *nextDigit - '0';
+ int digitValue = **nextDigit - '0';
if (value > std::numeric_limits<std::uint64_t>::max() - digitValue) {
overflow = true;
}
value += digitValue;
}
if (overflow) {
- state->PutMessage(at, "overflow in decimal literal"_en_US);
+ state->PutMessage(*firstDigit, "overflow in decimal literal"_en_US);
}
return {value};
}
@@ -365,13 +392,13 @@
using resultType = std::string;
static std::optional<std::string> Parse(ParseState *state) {
spaces.Parse(state);
- auto at = state->GetLocation();
+ const char *start{state->GetLocation()};
std::optional<std::uint64_t> charCount{DigitString{}.Parse(state)};
if (!charCount || *charCount < 1) {
return {};
}
- std::optional<char> h{letter.Parse(state)};
- if (!h || (*h != 'h' && *h != 'H')) {
+ std::optional<const char *> h{letter.Parse(state)};
+ if (!h || **h != 'h') {
return {};
}
std::string content;
@@ -381,32 +408,32 @@
if (state->encoding() == Encoding::EUC_JP) {
std::optional<int> chBytes{EUC_JPCharacterBytes(p)};
if (!chBytes.has_value()) {
- state->PutMessage(at, "bad EUC_JP characters in Hollerith"_en_US);
+ state->PutMessage(start, "bad EUC_JP characters in Hollerith"_en_US);
return {};
}
bytes = *chBytes;
} else if (state->encoding() == Encoding::UTF8) {
std::optional<int> chBytes{UTF8CharacterBytes(p)};
if (!chBytes.has_value()) {
- state->PutMessage(at, "bad UTF-8 characters in Hollerith"_en_US);
+ state->PutMessage(start, "bad UTF-8 characters in Hollerith"_en_US);
return {};
}
bytes = *chBytes;
}
if (bytes == 1) {
- std::optional<char> ch{nextChar.Parse(state)};
- if (!ch.has_value() || !isprint(*ch)) {
+ std::optional<const char *> at{nextCh.Parse(state)};
+ if (!at.has_value() || !isprint(**at)) {
state->PutMessage(
- at, "insufficient or bad characters in Hollerith"_en_US);
+ start, "insufficient or bad characters in Hollerith"_en_US);
return {};
}
- content += *ch;
+ content += **at;
} else {
// Multi-byte character
while (bytes-- > 0) {
- std::optional<char> byte{nextChar.Parse(state)};
+ std::optional<const char *> byte{nextCh.Parse(state)};
CHECK(byte.has_value());
- content += *byte;
+ content += **byte;
}
}
}
diff --git a/flang/lib/parser/token-sequence.cc b/flang/lib/parser/token-sequence.cc
index aa2a970..fec6f42 100644
--- a/flang/lib/parser/token-sequence.cc
+++ b/flang/lib/parser/token-sequence.cc
@@ -4,18 +4,6 @@
namespace Fortran {
namespace parser {
-bool ContiguousChars::IsBlank() const {
- const char *data{interval_.start()};
- size_t n{interval_.size()};
- for (size_t j{0}; j < n; ++j) {
- char ch{data[j]};
- if (ch != ' ' && ch != '\t') {
- return false;
- }
- }
- return true;
-}
-
void TokenSequence::clear() {
start_.clear();
nextStart_ = 0;
@@ -24,7 +12,7 @@
}
void TokenSequence::pop_back() {
- size_t bytes{nextStart_ - start_.back()};
+ std::size_t bytes{nextStart_ - start_.back()};
nextStart_ = start_.back();
start_.pop_back();
char_.resize(nextStart_);
@@ -51,22 +39,23 @@
}
void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
- size_t offset{0};
- for (size_t j{0}; j < that.size(); ++j) {
- ContiguousChars tok{that[j]};
+ std::size_t offset{0};
+ for (std::size_t j{0}; j < that.size(); ++j) {
+ CharBlock tok{that[j]};
Put(tok, range.OffsetMember(offset));
offset += tok.size();
}
CHECK(offset == range.size());
}
-void TokenSequence::Put(const TokenSequence &that, size_t at, size_t tokens) {
+void TokenSequence::Put(
+ const TokenSequence &that, std::size_t at, std::size_t tokens) {
ProvenanceRange provenance;
- size_t offset{0};
+ std::size_t offset{0};
for (; tokens-- > 0; ++at) {
- ContiguousChars tok{that[at]};
- size_t tokBytes{tok.size()};
- for (size_t j{0}; j < tokBytes; ++j) {
+ CharBlock tok{that[at]};
+ std::size_t tokBytes{tok.size()};
+ for (std::size_t j{0}; j < tokBytes; ++j) {
if (offset == provenance.size()) {
offset = 0;
provenance = that.provenances_.Map(that.start_[at] + j);
@@ -77,14 +66,15 @@
}
}
-void TokenSequence::Put(const char *s, size_t bytes, Provenance provenance) {
- for (size_t j{0}; j < bytes; ++j) {
+void TokenSequence::Put(
+ const char *s, std::size_t bytes, Provenance provenance) {
+ for (std::size_t j{0}; j < bytes; ++j) {
PutNextTokenChar(s[j], provenance + j);
}
CloseToken();
}
-void TokenSequence::Put(const ContiguousChars &t, Provenance provenance) {
+void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
Put(&t[0], t.size(), provenance);
}
@@ -96,14 +86,49 @@
Put(ss.str(), provenance);
}
-void TokenSequence::Emit(CookedSource *cooked) const {
- size_t tokens{start_.size()};
- size_t chars{char_.size()};
- size_t atToken{0};
- for (size_t j{0}; j < chars;) {
- size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
- cooked->Put(&char_[j], nextStart - j);
+void TokenSequence::EmitLowerCase(CookedSource *cooked) const {
+ std::size_t tokens{start_.size()};
+ std::size_t chars{char_.size()};
+ std::size_t atToken{0};
+ for (std::size_t j{0}; j < chars;) {
+ std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
+ const char *p{&char_[j]}, *limit{&char_[nextStart]};
j = nextStart;
+ if (IsDecimalDigit(*p)) {
+ while (p < limit && IsDecimalDigit(*p)) {
+ cooked->Put(*p++);
+ }
+ if (p < limit && (*p == 'h' || *p == 'H')) {
+ // Hollerith
+ cooked->Put('h');
+ cooked->Put(p + 1, limit - (p + 1));
+ } else {
+ // exponent
+ while (p < limit) {
+ cooked->Put(ToLowerCaseLetter(*p++));
+ }
+ }
+ } else if (limit[-1] == '\'' || limit[-1] == '"') {
+ if (*p == limit[-1]) {
+ // Character literal without prefix
+ cooked->Put(p, limit - p);
+ } else if (p[1] == limit[-1]) {
+ // BOZX-prefixed constant
+ while (p < limit) {
+ cooked->Put(ToLowerCaseLetter(*p++));
+ }
+ } else {
+ // Kanji NC'...' character literal or literal with kind-param prefix.
+ while (*p != limit[-1]) {
+ cooked->Put(ToLowerCaseLetter(*p++));
+ }
+ cooked->Put(p, limit - p);
+ }
+ } else {
+ while (p < limit) {
+ cooked->Put(ToLowerCaseLetter(*p++));
+ }
+ }
}
cooked->PutProvenanceMappings(provenances_);
}
@@ -113,19 +138,19 @@
}
Provenance TokenSequence::GetTokenProvenance(
- size_t token, size_t offset) const {
+ std::size_t token, std::size_t offset) const {
ProvenanceRange range{provenances_.Map(start_[token] + offset)};
return range.start();
}
ProvenanceRange TokenSequence::GetTokenProvenanceRange(
- size_t token, size_t offset) const {
+ std::size_t token, std::size_t offset) const {
ProvenanceRange range{provenances_.Map(start_[token] + offset)};
return range.Prefix(TokenBytes(token) - offset);
}
ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
- size_t token, size_t tokens) const {
+ std::size_t token, std::size_t tokens) const {
if (tokens == 0) {
return {};
}
diff --git a/flang/lib/parser/token-sequence.h b/flang/lib/parser/token-sequence.h
index 4e39f96..7053bf6 100644
--- a/flang/lib/parser/token-sequence.h
+++ b/flang/lib/parser/token-sequence.h
@@ -2,9 +2,12 @@
#define FORTRAN_PARSER_TOKEN_SEQUENCE_H_
// A buffer class capable of holding a contiguous sequence of characters
-// that have been partitioned into preprocessing tokens.
+// that has been partitioned into preprocessing tokens, along with their
+// associated provenances.
+#include "char-block.h"
#include "provenance.h"
+#include <cstddef>
#include <cstring>
#include <string>
#include <utility>
@@ -13,62 +16,14 @@
namespace Fortran {
namespace parser {
-// Just a const char pointer with an associated length; does not presume
-// to own the referenced data. Used to describe buffered tokens and hash
-// table keys.
-class ContiguousChars {
-public:
- ContiguousChars() {}
- ContiguousChars(const char *x, size_t n) : interval_{x, n} {}
- ContiguousChars(const std::string &s) : interval_{s.data(), s.size()} {}
- ContiguousChars(const ContiguousChars &that) = default;
- ContiguousChars &operator=(const ContiguousChars &that) = default;
-
- bool empty() const { return interval_.empty(); }
- size_t size() const { return interval_.size(); }
- const char &operator[](size_t j) const { return interval_.start()[j]; }
-
- bool IsBlank() const;
- std::string ToString() const {
- return std::string{interval_.start(), interval_.size()};
- }
-
-private:
- Interval<const char *> interval_{nullptr, 0};
-};
-} // namespace parser
-} // namespace Fortran
-
-// Specializations to enable std::unordered_map<ContiguousChars, ...>
-template<> struct std::hash<Fortran::parser::ContiguousChars> {
- size_t operator()(const Fortran::parser::ContiguousChars &x) const {
- size_t hash{0}, bytes{x.size()};
- for (size_t j{0}; j < bytes; ++j) {
- hash = (hash * 31) ^ x[j];
- }
- return hash;
- }
-};
-
-template<> struct std::equal_to<Fortran::parser::ContiguousChars> {
- bool operator()(const Fortran::parser::ContiguousChars &x,
- const Fortran::parser::ContiguousChars &y) const {
- return x.size() == y.size() &&
- std::memcmp(static_cast<const void *>(&x[0]),
- static_cast<const void *>(&y[0]), x.size()) == 0;
- }
-};
-
-namespace Fortran {
-namespace parser {
-
// Buffers a contiguous sequence of characters that has been partitioned into
// a sequence of preprocessing tokens with provenances.
class TokenSequence {
public:
TokenSequence() {}
TokenSequence(const TokenSequence &that) { Put(that); }
- TokenSequence(const TokenSequence &that, size_t at, size_t count = 1) {
+ TokenSequence(
+ const TokenSequence &that, std::size_t at, std::size_t count = 1) {
Put(that, at, count);
}
TokenSequence(TokenSequence &&that)
@@ -89,12 +44,12 @@
return *this;
}
- ContiguousChars operator[](size_t token) const {
+ CharBlock operator[](std::size_t token) const {
return {&char_[start_[token]], TokenBytes(token)};
}
bool empty() const { return start_.empty(); }
- size_t size() const { return start_.size(); }
+ std::size_t size() const { return start_.size(); }
const char *data() const { return &char_[0]; }
void clear();
void pop_back();
@@ -117,28 +72,30 @@
void Put(const TokenSequence &);
void Put(const TokenSequence &, ProvenanceRange);
- void Put(const TokenSequence &, size_t at, size_t tokens = 1);
- void Put(const char *, size_t, Provenance);
- void Put(const ContiguousChars &, Provenance);
+ void Put(const TokenSequence &, std::size_t at, std::size_t tokens = 1);
+ void Put(const char *, std::size_t, Provenance);
+ void Put(const CharBlock &, Provenance);
void Put(const std::string &, Provenance);
void Put(const std::stringstream &, Provenance);
- void Emit(CookedSource *) const;
std::string ToString() const;
- Provenance GetTokenProvenance(size_t token, size_t offset = 0) const;
+ Provenance GetTokenProvenance(
+ std::size_t token, std::size_t offset = 0) const;
ProvenanceRange GetTokenProvenanceRange(
- size_t token, size_t offset = 0) const;
+ std::size_t token, std::size_t offset = 0) const;
ProvenanceRange GetIntervalProvenanceRange(
- size_t token, size_t tokens = 1) const;
+ std::size_t token, std::size_t tokens = 1) const;
ProvenanceRange GetProvenanceRange() const;
+ void EmitLowerCase(CookedSource *) const;
+
private:
- size_t TokenBytes(size_t token) const {
+ std::size_t TokenBytes(std::size_t token) const {
return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) -
start_[token];
}
- std::vector<size_t> start_;
- size_t nextStart_{0};
+ std::vector<std::size_t> start_;
+ std::size_t nextStart_{0};
std::vector<char> char_;
OffsetToProvenanceMappings provenances_;
};
diff --git a/flang/lib/parser/unparse.cc b/flang/lib/parser/unparse.cc
index fc8a463..146bc03 100644
--- a/flang/lib/parser/unparse.cc
+++ b/flang/lib/parser/unparse.cc
@@ -8,6 +8,8 @@
#include "parse-tree-visitor.h"
#include "parse-tree.h"
#include <algorithm>
+#include <cinttypes>
+#include <cstddef>
namespace Fortran {
namespace parser {
@@ -59,6 +61,11 @@
void Post(const ProgramUnit &x) { // R502, R503
out_ << '\n'; // blank line after each ProgramUnit
}
+
+ bool Pre(const Name &x) { // R603
+ Put(x.source.ToString());
+ return false;
+ }
bool Pre(const DefinedOperator::IntrinsicOperator &x) { // R608
switch (x) {
case DefinedOperator::IntrinsicOperator::Power: Put("**"); break;
@@ -150,8 +157,7 @@
return false;
}
bool Pre(const RealLiteralConstant &x) { // R714, R715
- Put(x.intPart), Put('.'), Put(x.fraction), Walk(x.exponent);
- Walk("_", x.kind);
+ Put(x.real.source.ToString()), Walk("_", x.kind);
return false;
}
bool Pre(const ComplexLiteralConstant &x) { // R718 - R720
@@ -190,7 +196,7 @@
return false;
}
bool Pre(const HollerithLiteralConstant &x) {
- std::optional<size_t> chars{CountCharacters(x.v.data(), x.v.size(),
+ std::optional<std::size_t> chars{CountCharacters(x.v.data(), x.v.size(),
encoding_ == Encoding::EUC_JP ? EUC_JPCharacterBytes
: UTF8CharacterBytes)};
if (chars.has_value()) {
@@ -208,7 +214,7 @@
}
bool Pre(const DerivedTypeStmt &x) { // R727
Word("TYPE"), Walk(", ", std::get<std::list<TypeAttrSpec>>(x.t), ", ");
- Put(" :: "), Put(std::get<Name>(x.t));
+ Put(" :: "), Walk(std::get<Name>(x.t));
Walk("(", std::get<std::list<Name>>(x.t), ", ", ")");
Indent();
return false;
@@ -237,7 +243,7 @@
return false;
}
bool Pre(const TypeParamDecl &x) { // R733
- Put(std::get<Name>(x.t));
+ Walk(std::get<Name>(x.t));
Walk("=", std::get<std::optional<ScalarIntConstantExpr>>(x.t));
return false;
}
@@ -720,7 +726,11 @@
return false;
}
bool Pre(const LetterSpec &x) { // R865
- Put(std::get<char>(x.t)), Walk("-", std::get<std::optional<char>>(x.t));
+ Put(*std::get<const char *>(x.t));
+ auto second = std::get<std::optional<const char *>>(x.t);
+ if (second.has_value()) {
+ Put('-'), Put(**second);
+ }
return false;
}
bool Pre(const ImportStmt &x) { // R867
@@ -742,7 +752,7 @@
return false;
}
bool Pre(const NamelistStmt::Group &x) {
- Put('/'), Put(std::get<Name>(x.t)), Put('/');
+ Put('/'), Walk(std::get<Name>(x.t)), Put('/');
Walk(std::get<std::list<Name>>(x.t), ", ");
return false;
}
@@ -971,7 +981,7 @@
return false;
}
bool Pre(const DefinedOpName &x) { // R1003, R1023, R1414, & R1415
- Put('.'), Put(x.v), Put('.');
+ Put('.'), Walk(x.v), Put('.');
return false;
}
bool Pre(const AssignmentStmt &x) { // R1032
@@ -2058,7 +2068,7 @@
}
// Traverse a std::tuple<>, with an optional separator.
- template<size_t J = 0, typename T>
+ template<std::size_t J = 0, typename T>
void WalkTupleElements(const T &tuple, const char *separator) {
if constexpr (J < std::tuple_size_v<T>) {
if (J > 0) {
diff --git a/flang/lib/parser/user-state.h b/flang/lib/parser/user-state.h
index 948dab5..9904d9e 100644
--- a/flang/lib/parser/user-state.h
+++ b/flang/lib/parser/user-state.h
@@ -6,9 +6,9 @@
// parse tree construction so as to avoid any need for representing
// state in static data.
+#include "char-block.h"
#include <cinttypes>
#include <set>
-#include <string>
#include <unordered_set>
namespace Fortran {
@@ -35,10 +35,10 @@
}
}
- void NoteDefinedOperator(const std::string &opr) {
+ void NoteDefinedOperator(const CharBlock &opr) {
definedOperators_.insert(opr);
}
- bool IsDefinedOperator(const std::string &opr) const {
+ bool IsDefinedOperator(const CharBlock &opr) const {
return definedOperators_.find(opr) != definedOperators_.end();
}
@@ -46,7 +46,7 @@
std::unordered_set<Label> doLabels_;
int nonlabelDoConstructNestingDepth_{0};
- std::set<std::string> definedOperators_;
+ std::set<CharBlock> definedOperators_;
};
} // namespace parser
} // namespace Fortran
diff --git a/flang/lib/semantics/make-types.cc b/flang/lib/semantics/make-types.cc
index 5cb9255..c23b492 100644
--- a/flang/lib/semantics/make-types.cc
+++ b/flang/lib/semantics/make-types.cc
@@ -44,7 +44,7 @@
}
bool Pre(const parser::TypeAttrSpec::Extends &x) {
- builder_->extends(x.v);
+ builder_->extends(x.v.source.ToString());
return false;
}
bool Pre(const parser::AccessSpec &x) {
@@ -165,7 +165,7 @@
void Post(const parser::ProcDecl &x) {
const auto &name = std::get<parser::Name>(x.t);
//TODO: std::get<std::optional<ProcPointerInit>>(x.t)
- builder_->procComponent(ProcComponentDef(ProcDecl(name), *attrs_));
+ builder_->procComponent(ProcComponentDef(ProcDecl(name.source.ToString()), *attrs_));
}
bool Pre(const parser::DataComponentDefStmt &x) {
@@ -192,7 +192,7 @@
? *arraySpec_
: attrArraySpec_ ? *attrArraySpec_ : ComponentArraySpec{};
builder_->dataComponent(
- DataComponentDef(*declTypeSpec_, name, *attrs_, arraySpec));
+ DataComponentDef(*declTypeSpec_, name.source.ToString(), *attrs_, arraySpec));
arraySpec_.reset();
}
@@ -289,7 +289,7 @@
return true;
}
void Post(const parser::DerivedTypeStmt &x) {
- builder_->name(std::get<Name>(x.t));
+ builder_->name(std::get<parser::Name>(x.t).source.ToString());
builder_->attrs(*attrs_);
attrs_.release();
}