[libc++][regex] Validate backreferences in the constructor.
This patch enables throwing exceptions for invalid backreferences
in the constructor when using the basic, extended, grep, or egrep grammar.
This fixes bug 34297.
Differential Revision: https://reviews.llvm.org/D62453
diff --git a/libcxx/include/regex b/libcxx/include/regex
index e349fa6..68cfbb2 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -4661,6 +4661,8 @@
unsigned __val = __traits_.value(c, 10);
if (__val >= 1 && __val <= 9)
{
+ if (__val > mark_count())
+ __throw_regex_error<regex_constants::error_backref>();
__push_back_ref(__val);
return true;
}
diff --git a/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp b/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
index 0a15b64..08a1444 100644
--- a/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
+++ b/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
@@ -18,11 +18,11 @@
#include <cassert>
#include "test_macros.h"
-static bool error_badbackref_thrown(const char *pat)
+static bool error_badbackref_thrown(const char *pat, std::regex::flag_type f)
{
bool result = false;
try {
- std::regex re(pat);
+ std::regex re(pat, f);
} catch (const std::regex_error &ex) {
result = (ex.code() == std::regex_constants::error_backref);
}
@@ -31,9 +31,25 @@
int main(int, char**)
{
- assert(error_badbackref_thrown("\\1abc")); // no references
- assert(error_badbackref_thrown("ab(c)\\2def")); // only one reference
- assert(error_badbackref_thrown("\\800000000000000000000000000000")); // overflows
+// no references
+ assert(error_badbackref_thrown("\\1abc", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("\\1abd", std::regex::basic));
+ assert(error_badbackref_thrown("\\1abd", std::regex::extended));
+ assert(error_badbackref_thrown("\\1abd", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1abd", std::regex::grep));
+ assert(error_badbackref_thrown("\\1abd", std::regex::egrep));
+
+// only one reference
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::basic));
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::extended));
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::awk) == false);
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::awk) == false);
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::grep));
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::egrep));
+
+
+ assert(error_badbackref_thrown("\\800000000000000000000000000000", std::regex_constants::ECMAScript)); // overflows
// this should NOT throw, because we only should look at the '1'
// See https://bugs.llvm.org/show_bug.cgi?id=31387
@@ -42,5 +58,35 @@
std::regex re(pat1, pat1 + 7); // extra chars after the end.
}
+// reference before group
+ assert(error_badbackref_thrown("\\1(abc)", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::basic));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::extended));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::grep));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::egrep));
+
+// reference limit
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::ECMAScript));
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::basic) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::extended) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::awk) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::grep) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::egrep) == false);
+
+// https://bugs.llvm.org/show_bug.cgi?id=34297
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::basic));
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::basic) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::extended) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::extended));
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::awk) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::grep));
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::grep) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::egrep) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::egrep));
+
return 0;
}