-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[clang][scandeps] Improve handling of rawstrings. #139504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang Author: Tobias Hieta (tru) ChangesThe current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like: R And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner. This was co-authored by Sylvain Audi <[email protected]> (@sylvain-audi) Fixes #137648 Full diff: https://github.com/llvm/llvm-project/pull/139504.diff 2 Files Affected:
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..86e860abdbbdc 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -206,6 +206,24 @@ static void skipOverSpaces(const char *&First, const char *const End) {
++First;
}
+// Move back by one character, skipping escaped newlines (backslash + \n)
+static char previousChar(const char *First, const char *&Current) {
+ assert(Current > First);
+ --Current;
+ while (Current > First + 1 && isVerticalWhitespace(*Current)) {
+ const char PrevChar = *(Current - 1);
+ if (PrevChar == '\\') {
+ Current -= 2; // backslash + (\n or \r)
+ } else if (Current > First + 2 && isVerticalWhitespace(PrevChar) &&
+ PrevChar != *Current && *(Current - 2) == '\\') {
+ Current -= 3; // backslash + (\n\r or \r\n)
+ } else {
+ break;
+ }
+ }
+ return *Current;
+}
+
[[nodiscard]] static bool isRawStringLiteral(const char *First,
const char *Current) {
assert(First <= Current);
@@ -215,25 +233,28 @@ static void skipOverSpaces(const char *&First, const char *const End) {
return false;
// Check for an "R".
- --Current;
- if (*Current != 'R')
+ if (previousChar(First, Current) != 'R')
return false;
- if (First == Current || !isAsciiIdentifierContinue(*--Current))
+ if (First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current)))
return true;
// Check for a prefix of "u", "U", or "L".
if (*Current == 'u' || *Current == 'U' || *Current == 'L')
- return First == Current || !isAsciiIdentifierContinue(*--Current);
+ return First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current));
// Check for a prefix of "u8".
- if (*Current != '8' || First == Current || *Current-- != 'u')
+ if (*Current != '8' || First == Current ||
+ previousChar(First, Current) != 'u')
return false;
- return First == Current || !isAsciiIdentifierContinue(*--Current);
+ return First == Current ||
+ !isAsciiIdentifierContinue(previousChar(First, Current));
}
static void skipRawString(const char *&First, const char *const End) {
assert(First[0] == '"');
- assert(First[-1] == 'R');
+ //assert(First[-1] == 'R');
const char *Last = ++First;
while (Last != End && *Last != '(')
@@ -416,6 +437,14 @@ void Scanner::skipLine(const char *&First, const char *const End) {
continue;
}
+ // Continue on the same line if an EOL is preceded with backslash
+ if (First + 1 < End && *First == '\\') {
+ if (unsigned Len = isEOL(First + 1, End)) {
+ First += 1 + Len;
+ continue;
+ }
+ }
+
// Iterate over comments correctly.
if (*First != '/' || End - First < 2) {
LastTokenPtr = First;
diff --git a/clang/test/ClangScanDeps/raw-strings.cpp b/clang/test/ClangScanDeps/raw-strings.cpp
new file mode 100644
index 0000000000000..5fda4a559c9e3
--- /dev/null
+++ b/clang/test/ClangScanDeps/raw-strings.cpp
@@ -0,0 +1,55 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+//--- cdb.json.in
+[{
+ "directory": "DIR",
+ "command": "clang -c DIR/tu.c -o DIR/tu.o -IDIR/include",
+ "file": "DIR/tu.c"
+}]
+//--- include/header.h
+//--- include/header2.h
+//--- include/header3.h
+//--- include/header4.h
+//--- tu.c
+#if 0
+R"x()x"
+#endif
+
+#include "header.h"
+
+#if 0
+R"y(";
+#endif
+#include "header2.h"
+
+#if 0
+//")y"
+#endif
+
+#if 0
+R"y(";
+R"z()y";
+#endif
+#include "header3.h"
+#if 0
+//")z"
+#endif
+
+#if 0
+R\
+"y(";
+R"z()y";
+#endif
+#include "header4.h"
+#if 0
+//")z"
+#endif
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess | FileCheck %s
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess-dependency-directives | FileCheck %s
+// CHECK: tu.c
+// CHECK-NEXT: header.h
+// CHECK-NEXT: header3.h
+// CHECK-NEXT: header4.h
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
The current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like: R\ "str" And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner. This was co-authored by Sylvain Audi <[email protected]> Fixes llvm#137648
This is so that we can use it in the dependency scanner without duplicating the logic there.
As suggested in the review, use Lexer::getEscapedNewLineSize() instead of implementing our own logic for it.
b722c2e
to
63ca116
Compare
Sorry for the delay. I have addressed both your comments now @benlangmuir - reworked it to use Lexer::getEscapedNewLineSize() and removed the assertline. I also expanded the test coverage. |
The current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like:
R
"str"
And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner.
This was co-authored by Sylvain Audi [email protected] (@sylvain-audi)
Fixes #137648