| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // Note that although this is not a "browser" test, it runs as part of |
| // browser_tests. This is because WebKit does not work properly if it is |
| // shutdown and re-initialized. Since browser_tests runs each test in a |
| // new process, this avoids the problem. |
| |
| #include "chrome/renderer/safe_browsing/phishing_classifier.h" |
| |
| #include <string> |
| |
| #include "base/memory/scoped_ptr.h" |
| #include "base/sha2.h" |
| #include "base/string16.h" |
| #include "base/utf_string_conversions.h" |
| #include "chrome/renderer/safe_browsing/client_model.pb.h" |
| #include "chrome/renderer/safe_browsing/features.h" |
| #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" |
| #include "chrome/renderer/safe_browsing/render_view_fake_resources_test.h" |
| #include "chrome/renderer/safe_browsing/scorer.h" |
| #include "testing/gmock/include/gmock/gmock.h" |
| |
| namespace safe_browsing { |
| |
| class PhishingClassifierTest : public RenderViewFakeResourcesTest { |
| protected: |
| virtual void SetUp() { |
| // Set up WebKit and the RenderView. |
| RenderViewFakeResourcesTest::SetUp(); |
| |
| // Construct a model to test with. We include one feature from each of |
| // the feature extractors, which allows us to verify that they all ran. |
| ClientSideModel model; |
| model.add_hashes(base::SHA256HashString(features::kUrlTldToken + |
| std::string("net"))); |
| model.add_hashes(base::SHA256HashString(features::kPageLinkDomain + |
| std::string("phishing.com"))); |
| model.add_hashes(base::SHA256HashString(features::kPageTerm + |
| std::string("login"))); |
| model.add_hashes(base::SHA256HashString("login")); |
| |
| // Add a default rule with a non-phishy weight. |
| ClientSideModel::Rule* rule = model.add_rule(); |
| rule->set_weight(-1.0); |
| |
| // To give a phishy score, the total weight needs to be >= 0 |
| // (0.5 when converted to a probability). This will only happen |
| // if all of the listed features are present. |
| rule = model.add_rule(); |
| rule->add_feature(0); |
| rule->add_feature(1); |
| rule->add_feature(2); |
| rule->set_weight(1.0); |
| |
| model.add_page_term(3); |
| model.add_page_word(3); |
| model.set_max_words_per_term(1); |
| |
| clock_ = new MockFeatureExtractorClock; |
| scorer_.reset(Scorer::Create(model.SerializeAsString())); |
| ASSERT_TRUE(scorer_.get()); |
| classifier_.reset(new PhishingClassifier(view_, clock_)); |
| } |
| |
| virtual void TearDown() { |
| RenderViewFakeResourcesTest::TearDown(); |
| } |
| |
| // Helper method to start phishing classification and wait for it to |
| // complete. Returns the success value from the PhishingClassifier's |
| // DoneCallback, and fills in phishy_score with the score. |
| bool RunPhishingClassifier(const string16* page_text, double* phishy_score) { |
| success_ = false; |
| *phishy_score = PhishingClassifier::kInvalidScore; |
| |
| classifier_->BeginClassification( |
| page_text, |
| NewCallback(this, &PhishingClassifierTest::ClassificationFinished)); |
| message_loop_.Run(); |
| |
| *phishy_score = phishy_score_; |
| return success_; |
| } |
| |
| // Completion callback for classification. |
| void ClassificationFinished(bool success, double phishy_score) { |
| success_ = success; |
| phishy_score_ = phishy_score; |
| message_loop_.Quit(); |
| } |
| |
| scoped_ptr<Scorer> scorer_; |
| scoped_ptr<PhishingClassifier> classifier_; |
| MockFeatureExtractorClock* clock_; // owned by classifier_ |
| |
| // These members hold the status from the most recent call to the |
| // ClassificationFinished callback. |
| bool success_; |
| double phishy_score_; |
| }; |
| |
| TEST_F(PhishingClassifierTest, TestClassification) { |
| // No scorer yet, so the classifier is not ready. |
| EXPECT_FALSE(classifier_->is_ready()); |
| |
| // Now set the scorer. |
| classifier_->set_phishing_scorer(scorer_.get()); |
| EXPECT_TRUE(classifier_->is_ready()); |
| |
| // This test doesn't exercise the extraction timing. |
| EXPECT_CALL(*clock_, Now()) |
| .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); |
| |
| responses_["http://host.net/"] = |
| "<html><body><a href=\"http://phishing.com/\">login</a></body></html>"; |
| LoadURL("http://host.net/"); |
| |
| string16 page_text = ASCIIToUTF16("login"); |
| double phishy_score; |
| EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score)); |
| EXPECT_DOUBLE_EQ(0.5, phishy_score); |
| |
| // Change the link domain to something non-phishy. |
| responses_["http://host.net/"] = |
| "<html><body><a href=\"http://safe.com/\">login</a></body></html>"; |
| LoadURL("http://host.net/"); |
| |
| EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); |
| EXPECT_GE(phishy_score, 0.0); |
| EXPECT_LT(phishy_score, 0.5); |
| |
| // Extraction should fail for this case, since there is no TLD. |
| responses_["http://localhost/"] = "<html><body>content</body></html>"; |
| LoadURL("http://localhost/"); |
| EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); |
| EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); |
| |
| // Extraction should also fail for this case, because the URL is not http. |
| responses_["https://host.net/"] = "<html><body>secure</body></html>"; |
| LoadURL("https://host.net/"); |
| EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); |
| EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); |
| |
| // Extraction should fail for this case because the URL is a POST request. |
| LoadURLWithPost("http://host.net/"); |
| EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score)); |
| EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); |
| } |
| |
| } // namespace safe_browsing |