-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[clang][scandeps] Improve handling of rawstrings. #139504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
The current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like: R\ "str" And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner. This was co-authored by Sylvain Audi <[email protected]> Fixes llvm#137648
@llvm/pr-subscribers-clang Author: Tobias Hieta (tru) ChangesThe current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like: R And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner. This was co-authored by Sylvain Audi <[email protected]> (@sylvain-audi) Fixes #137648 Full diff: https://github.com/llvm/llvm-project/pull/139504.diff 2 Files Affected:
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..86e860abdbbdc 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -206,6 +206,24 @@ static void skipOverSpaces(const char *&First, const char *const End) {
++First;
}
+// Move back by one character, skipping escaped newlines (backslash + \n)
+static char previousChar(const char *First, const char *&Current) {
+ assert(Current > First);
+ --Current;
+ while (Current > First + 1 && isVerticalWhitespace(*Current)) {
+ const char PrevChar = *(Current - 1);
+ if (PrevChar == '\\') {
+ Current -= 2; // backslash + (\n or \r)
+ } else if (Current > First + 2 && isVerticalWhitespace(PrevChar) &&
+ PrevChar != *Current && *(Current - 2) == '\\') {
+ Current -= 3; // backslash + (\n\r or \r\n)
+ } else {
+ break;
+ }
+ }
+ return *Current;
+}
+
[[nodiscard]] static bool isRawStringLiteral(const char *First,
const char *Current) {
assert(First <= Current);
@@ -215,25 +233,28 @@ static void skipOverSpaces(const char *&First, const char *const End) {
return false;
// Check for an "R".
- --Current;
- if (*Current != 'R')
+ if (previousChar(First, Current) != 'R')
return false;
- if (First == Current || !isAsciiIdentifierContinue(*--Current))
+ if (First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current)))
return true;
// Check for a prefix of "u", "U", or "L".
if (*Current == 'u' || *Current == 'U' || *Current == 'L')
- return First == Current || !isAsciiIdentifierContinue(*--Current);
+ return First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current));
// Check for a prefix of "u8".
- if (*Current != '8' || First == Current || *Current-- != 'u')
+ if (*Current != '8' || First == Current ||
+ previousChar(First, Current) != 'u')
return false;
- return First == Current || !isAsciiIdentifierContinue(*--Current);
+ return First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current));
}
static void skipRawString(const char *&First, const char *const End) {
assert(First[0] == '"');
- assert(First[-1] == 'R');
+ //assert(First[-1] == 'R');
const char *Last = ++First;
while (Last != End && *Last != '(')
@@ -416,6 +437,14 @@ void Scanner::skipLine(const char *&First, const char *const End) {
continue;
}
+ // Continue on the same line if an EOL is preceded with backslash
+ if (First + 1 < End && *First == '\\') {
+ if (unsigned Len = isEOL(First + 1, End)) {
+ First += 1 + Len;
+ continue;
+ }
+ }
+
// Iterate over comments correctly.
if (*First != '/' || End - First < 2) {
LastTokenPtr = First;
diff --git a/clang/test/ClangScanDeps/raw-strings.cpp b/clang/test/ClangScanDeps/raw-strings.cpp
new file mode 100644
index 0000000000000..5fda4a559c9e3
--- /dev/null
+++ b/clang/test/ClangScanDeps/raw-strings.cpp
@@ -0,0 +1,55 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+//--- cdb.json.in
+[{
+ "directory": "DIR",
+ "command": "clang -c DIR/tu.c -o DIR/tu.o -IDIR/include",
+ "file": "DIR/tu.c"
+}]
+//--- include/header.h
+//--- include/header2.h
+//--- include/header3.h
+//--- include/header4.h
+//--- tu.c
+#if 0
+R"x()x"
+#endif
+
+#include "header.h"
+
+#if 0
+R"y(";
+#endif
+#include "header2.h"
+
+#if 0
+//")y"
+#endif
+
+#if 0
+R"y(";
+R"z()y";
+#endif
+#include "header3.h"
+#if 0
+//")z"
+#endif
+
+#if 0
+R\
+"y(";
+R"z()y";
+#endif
+#include "header4.h"
+#if 0
+//")z"
+#endif
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess | FileCheck %s
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess-dependency-directives | FileCheck %s
+// CHECK: tu.c
+// CHECK-NEXT: header.h
+// CHECK-NEXT: header3.h
+// CHECK-NEXT: header4.h
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- clang/test/ClangScanDeps/raw-strings.cpp clang/lib/Lex/DependencyDirectivesScanner.cpp View the diff from clang-format here.diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 86e860abd..6565e77af 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -254,7 +254,7 @@ static char previousChar(const char *First, const char *&Current) {
static void skipRawString(const char *&First, const char *const End) {
assert(First[0] == '"');
- //assert(First[-1] == 'R');
+ // assert(First[-1] == 'R');
const char *Last = ++First;
while (Last != End && *Last != '(')
|
} | ||
|
||
static void skipRawString(const char *&First, const char *const End) { | ||
assert(First[0] == '"'); | ||
assert(First[-1] == 'R'); | ||
//assert(First[-1] == 'R'); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please remove this line
static char previousChar(const char *First, const char *&Current) { | ||
assert(Current > First); | ||
--Current; | ||
while (Current > First + 1 && isVerticalWhitespace(*Current)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Lexer allows trailing horizontal whitespace after the \
(with a warning). Maybe we should make Lexer::getEscapedNewLineSize
public and use that here to keep it in sync?
The current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like:
R
"str"
And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner.
This was co-authored by Sylvain Audi [email protected] (@sylvain-audi)
Fixes #137648