Introduce common browser/web API for validation of custom handlers

Logic to validate custom handlers is required on both the web and
browser processes. This CL introduces a new API in
third_party/blink/public/common in order to reduce duplication. As a
starting point, a new helper function allows to verify whether the
following condition is satisfied [1]:

> If scheme is neither a safelisted scheme nor a string starting with
> "web+" followed by one or more ASCII lower alphas'

In order to keep this CL small, more advanced aspects like same-origin
condition (currently performed in WebContentsImpl), validation of the
schemes of the registered URLs [2] [3] or other tests that are currently
only performed on the web process are not considered. This can be refine
later if needed.

This CL makes the check on the browser process slighty stronger.
Previously the only requirement for URLs starting with "web+" was to be
sure they are not just equal to "web+".

This CL might also make verification on the web process slightly less
efficient, if the conversion from WTF::String to base::StringPiece
requires a buffer allocation. However, it seems unlikely to be a
performance bottleneck for the current use cases.

[1] https://html.spec.whatwg.org/multipage/system-state.html#normalize-protocol-handler-parameters
[2] https://crbug.com/1112268
[3] https://crbug.com/64100

Bug: 971917, 952974
Change-Id: Iaada22200d7b7d834ad878bbc51cc40ea67d6332
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2362802
Commit-Queue: Frédéric Wang <[email protected]>
Reviewed-by: Mike West <[email protected]>
Reviewed-by: Dominick Ng <[email protected]>
Cr-Commit-Position: refs/heads/master@{#800948}
diff --git a/chrome/browser/custom_handlers/protocol_handler_registry_unittest.cc b/chrome/browser/custom_handlers/protocol_handler_registry_unittest.cc
index 550f163..252331d 100644
--- a/chrome/browser/custom_handlers/protocol_handler_registry_unittest.cc
+++ b/chrome/browser/custom_handlers/protocol_handler_registry_unittest.cc
@@ -1072,6 +1072,36 @@
 }
 
 // See
+// https://html.spec.whatwg.org/multipage/system-state.html#normalize-protocol-handler-parameters
+TEST_F(ProtocolHandlerRegistryTest, WebPlusPrefix) {
+  // Not ASCII alphas.
+  registry()->OnAcceptRegisterProtocolHandler(CreateProtocolHandler(
+      "web+***", GURL("https://www.google.com/handler%s")));
+  ASSERT_FALSE(registry()->IsHandledProtocol("web+***"));
+  registry()->OnAcceptRegisterProtocolHandler(CreateProtocolHandler(
+      "web+123", GURL("https://www.google.com/handler%s")));
+  ASSERT_FALSE(registry()->IsHandledProtocol("web+123"));
+  registry()->OnAcceptRegisterProtocolHandler(CreateProtocolHandler(
+      "web+   ", GURL("https://www.google.com/handler%s")));
+  ASSERT_FALSE(registry()->IsHandledProtocol("web+   "));
+  registry()->OnAcceptRegisterProtocolHandler(CreateProtocolHandler(
+      "web+name123", GURL("https://www.google.com/handler%s")));
+  ASSERT_FALSE(registry()->IsHandledProtocol("web+name123"));
+
+  // ASCII lower alphas.
+  registry()->OnAcceptRegisterProtocolHandler(
+      CreateProtocolHandler("web+abcdefghijklmnopqrstuvwxyz",
+                            GURL("https://www.google.com/handler%s")));
+  ASSERT_TRUE(registry()->IsHandledProtocol("web+abcdefghijklmnopqrstuvwxyz"));
+
+  // ASCII upper alphas are lowercased.
+  registry()->OnAcceptRegisterProtocolHandler(
+      CreateProtocolHandler("web+ZYXWVUTSRQPONMLKJIHGFEDCBA",
+                            GURL("https://www.google.com/handler%s")));
+  ASSERT_TRUE(registry()->IsHandledProtocol("web+zyxwvutsrqponmlkjihgfedcba"));
+}
+
+// See
 // https://html.spec.whatwg.org/multipage/system-state.html#safelisted-scheme
 TEST_F(ProtocolHandlerRegistryTest, SafelistedSchemes) {
   std::string schemes[] = {
diff --git a/chrome/common/custom_handlers/protocol_handler.cc b/chrome/common/custom_handlers/protocol_handler.cc
index e6adcc0..e76a088 100644
--- a/chrome/common/custom_handlers/protocol_handler.cc
+++ b/chrome/common/custom_handlers/protocol_handler.cc
@@ -12,6 +12,7 @@
 #include "content/public/common/origin_util.h"
 #include "extensions/common/constants.h"
 #include "net/base/escape.h"
+#include "third_party/blink/public/common/custom_handlers/protocol_handler_utils.h"
 #include "ui/base/l10n/l10n_util.h"
 
 ProtocolHandler::ProtocolHandler(const std::string& protocol,
@@ -39,33 +40,15 @@
 bool ProtocolHandler::IsValid() const {
   // TODO(https://crbug.com/977083): Consider limiting to secure contexts.
 
-  // This matches SupportedSchemes() in blink's NavigatorContentUtils.
-
-  // Although not enforced in the spec the spec gives freedom to do additional
-  // security checks. Bugs have arisen from allowing non-http/https URLs, e.g.
-  // https://crbug.com/971917 so we check this here.
+  // This matches VerifyCustomHandlerURLSecurity() in blink's
+  // NavigatorContentUtils.
   if (!url_.SchemeIsHTTPOrHTTPS() &&
       !url_.SchemeIs(extensions::kExtensionScheme)) {
     return false;
   }
 
-  // From:
-  // https://html.spec.whatwg.org/multipage/system-state.html#safelisted-scheme
-  static constexpr const char* const kProtocolSafelist[] = {
-      "bitcoin",  "cabal",       "dat",    "did",    "doi",   "dweb",
-      "ethereum", "geo",         "hyper",  "im",     "ipfs",  "ipns",
-      "irc",      "ircs",        "magnet", "mailto", "mms",   "news",
-      "nntp",     "openpgp4fpr", "sip",    "sms",    "smsto", "ssb",
-      "ssh",      "tel",         "urn",    "webcal", "wtai",  "xmpp"};
-  static constexpr const char kWebPrefix[] = "web+";
-
-  bool has_web_prefix =
-      base::StartsWith(protocol_, kWebPrefix,
-                       base::CompareCase::INSENSITIVE_ASCII) &&
-      protocol_ != kWebPrefix;
-
-  return has_web_prefix ||
-         base::Contains(kProtocolSafelist, base::ToLowerASCII(protocol_));
+  bool has_custom_scheme_prefix;
+  return blink::IsValidCustomHandlerScheme(protocol_, has_custom_scheme_prefix);
 }
 
 bool ProtocolHandler::IsSameOrigin(
diff --git a/third_party/blink/common/BUILD.gn b/third_party/blink/common/BUILD.gn
index 6a098d9c..befabe9f 100644
--- a/third_party/blink/common/BUILD.gn
+++ b/third_party/blink/common/BUILD.gn
@@ -76,6 +76,7 @@
     "browser_interface_broker_proxy.cc",
     "cache_storage/cache_storage_utils.cc",
     "client_hints/client_hints.cc",
+    "custom_handlers/protocol_handler_utils.cc",
     "device_memory/approximated_device_memory.cc",
     "dom_storage/session_storage_namespace_id.cc",
     "experiments/memory_ablation_experiment.cc",
diff --git a/third_party/blink/common/custom_handlers/protocol_handler_utils.cc b/third_party/blink/common/custom_handlers/protocol_handler_utils.cc
new file mode 100644
index 0000000..d576c3a
--- /dev/null
+++ b/third_party/blink/common/custom_handlers/protocol_handler_utils.cc
@@ -0,0 +1,42 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "third_party/blink/public/common/custom_handlers/protocol_handler_utils.h"
+
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+
+namespace blink {
+
+bool IsValidCustomHandlerScheme(const base::StringPiece scheme,
+                                bool& has_custom_scheme_prefix) {
+  has_custom_scheme_prefix = false;
+
+  static constexpr const char kWebPrefix[] = "web+";
+  static constexpr const size_t kWebPrefixLength = base::size(kWebPrefix) - 1;
+  if (base::StartsWith(scheme, kWebPrefix,
+                       base::CompareCase::INSENSITIVE_ASCII)) {
+    has_custom_scheme_prefix = true;
+    // HTML5 requires that schemes with the |web+| prefix contain one or more
+    // ASCII alphas after that prefix.
+    auto scheme_name = scheme.substr(kWebPrefixLength);
+    if (scheme_name.empty())
+      return false;
+    for (auto& character : scheme_name) {
+      if (!base::IsAsciiAlpha(character))
+        return false;
+    }
+    return true;
+  }
+
+  static constexpr const char* const kProtocolSafelist[] = {
+      "bitcoin",  "cabal",       "dat",    "did",    "doi",   "dweb",
+      "ethereum", "geo",         "hyper",  "im",     "ipfs",  "ipns",
+      "irc",      "ircs",        "magnet", "mailto", "mms",   "news",
+      "nntp",     "openpgp4fpr", "sip",    "sms",    "smsto", "ssb",
+      "ssh",      "tel",         "urn",    "webcal", "wtai",  "xmpp"};
+  return base::Contains(kProtocolSafelist, base::ToLowerASCII(scheme));
+}
+
+}  // namespace blink
diff --git a/third_party/blink/public/common/BUILD.gn b/third_party/blink/public/common/BUILD.gn
index bdbb9b0f..a5190a7 100644
--- a/third_party/blink/public/common/BUILD.gn
+++ b/third_party/blink/public/common/BUILD.gn
@@ -71,6 +71,7 @@
     "css/page_size_type.h",
     "css/preferred_color_scheme.h",
     "css/screen_spanning.h",
+    "custom_handlers/protocol_handler_utils.h",
     "device_memory/approximated_device_memory.h",
     "dom_storage/session_storage_namespace_id.h",
     "experiments/memory_ablation_experiment.h",
diff --git a/third_party/blink/public/common/custom_handlers/protocol_handler_utils.h b/third_party/blink/public/common/custom_handlers/protocol_handler_utils.h
new file mode 100644
index 0000000..45bf8f4
--- /dev/null
+++ b/third_party/blink/public/common/custom_handlers/protocol_handler_utils.h
@@ -0,0 +1,19 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef THIRD_PARTY_BLINK_PUBLIC_COMMON_CUSTOM_HANDLERS_PROTOCOL_HANDLER_UTILS_H_
+#define THIRD_PARTY_BLINK_PUBLIC_COMMON_CUSTOM_HANDLERS_PROTOCOL_HANDLER_UTILS_H_
+
+#include "base/strings/string_piece_forward.h"
+#include "third_party/blink/public/common/common_export.h"
+
+namespace blink {
+
+bool BLINK_COMMON_EXPORT
+IsValidCustomHandlerScheme(const base::StringPiece scheme,
+                           bool& has_custom_scheme_prefix);
+
+}  // namespace blink
+
+#endif  // THIRD_PARTY_BLINK_PUBLIC_COMMON_CUSTOM_HANDLERS_PROTOCOL_HANDLER_UTILS_H_
diff --git a/third_party/blink/renderer/modules/navigatorcontentutils/navigator_content_utils.cc b/third_party/blink/renderer/modules/navigatorcontentutils/navigator_content_utils.cc
index f168e6b7..832d56c 100644
--- a/third_party/blink/renderer/modules/navigatorcontentutils/navigator_content_utils.cc
+++ b/third_party/blink/renderer/modules/navigatorcontentutils/navigator_content_utils.cc
@@ -27,6 +27,7 @@
 #include "third_party/blink/renderer/modules/navigatorcontentutils/navigator_content_utils.h"
 
 #include "base/stl_util.h"
+#include "third_party/blink/public/common/custom_handlers/protocol_handler_utils.h"
 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
 #include "third_party/blink/renderer/core/frame/local_frame.h"
 #include "third_party/blink/renderer/core/frame/web_local_frame_impl.h"
@@ -36,6 +37,7 @@
 #include "third_party/blink/renderer/platform/weborigin/security_origin.h"
 #include "third_party/blink/renderer/platform/wtf/std_lib_extras.h"
 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
+#include "third_party/blink/renderer/platform/wtf/text/string_utf8_adaptor.h"
 
 namespace blink {
 
@@ -45,21 +47,6 @@
 
 const char kToken[] = "%s";
 
-// Changes to this list must be kept in sync with the browser-side checks in
-// /chrome/common/custom_handlers/protocol_handler.cc.
-static const HashSet<String>& SupportedSchemes() {
-  DEFINE_STATIC_LOCAL(
-      HashSet<String>, supported_schemes,
-      ({
-          "bitcoin",     "cabal",  "dat",    "did",   "dweb", "ethereum",
-          "geo",         "hyper",  "im",     "ipfs",  "ipns", "irc",
-          "ircs",        "magnet", "mailto", "mms",   "news", "nntp",
-          "openpgp4fpr", "sip",    "sms",    "smsto", "ssb",  "ssh",
-          "tel",         "urn",    "webcal", "wtai",  "xmpp",
-      }));
-  return supported_schemes;
-}
-
 static bool VerifyCustomHandlerURLSecurity(const LocalDOMWindow& window,
                                            const KURL& full_url,
                                            String& error_message) {
@@ -108,22 +95,6 @@
   return true;
 }
 
-// HTML5 requires that schemes with the |web+| prefix contain one or more
-// ASCII alphas after that prefix.
-static bool IsValidWebSchemeName(const String& protocol) {
-  if (protocol.length() < 5)
-    return false;
-
-  unsigned protocol_length = protocol.length();
-  for (unsigned i = 4; i < protocol_length; i++) {
-    char c = protocol[i];
-    if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
-      return false;
-    }
-  }
-  return true;
-}
-
 }  // namespace
 
 bool VerifyCustomHandlerScheme(const String& scheme, String& error_string) {
@@ -133,24 +104,25 @@
     return false;
   }
 
-  if (scheme.StartsWithIgnoringASCIICase("web+")) {
-    if (IsValidWebSchemeName(scheme))
-      return true;
-    error_string =
-        "The scheme name '" + scheme +
-        "' is not allowed. Schemes starting with 'web+' must be followed by "
-        "one or more ASCII letters.";
+  bool has_custom_scheme_prefix;
+  StringUTF8Adaptor scheme_adaptor(scheme);
+  if (!IsValidCustomHandlerScheme(scheme_adaptor.AsStringPiece(),
+                                  has_custom_scheme_prefix)) {
+    if (has_custom_scheme_prefix) {
+      error_string =
+          "The scheme name '" + scheme +
+          "' is not allowed. Schemes starting with 'web+' must be followed by "
+          "one or more ASCII letters.";
+    } else {
+      error_string = "The scheme '" + scheme +
+                     "' doesn't belong to the scheme allowlist. "
+                     "Please prefix non-allowlisted schemes "
+                     "with the string 'web+'.";
+    }
     return false;
   }
 
-  if (SupportedSchemes().Contains(scheme.LowerASCII()))
-    return true;
-
-  error_string = "The scheme '" + scheme +
-                 "' doesn't belong to the scheme allowlist. "
-                 "Please prefix non-allowlisted schemes "
-                 "with the string 'web+'.";
-  return false;
+  return true;
 }
 
 bool VerifyCustomHandlerURLSyntax(const KURL& full_url,