blob: 816c890c499b2a5bfdd2ecd71ae7ce4ecb818e01 [file] [log] [blame]
[email protected]fb5bcc02012-02-17 14:05:421// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/extensions/api/declarative/url_matcher.h"
6
7#include <algorithm>
[email protected]9947d0e2012-02-23 22:36:538#include <iterator>
[email protected]fb5bcc02012-02-17 14:05:429
10#include "base/logging.h"
11#include "googleurl/src/gurl.h"
12
13namespace extensions {
14
15// This set of classes implement a mapping of URL Component Patterns, such as
16// host_prefix, host_suffix, host_equals, ..., etc., to SubstringPatterns.
17//
18// The idea of this mapping is to reduce the problem of comparing many
19// URL Component Patterns against one URL to the problem of searching many
20// substrings in one string:
21//
22// ---------------------- --------------------
23// | URL Query operator | ----translate----> | SubstringPattern |
24// ---------------------- --------------------
25// ^
26// |
27// compare
28// |
29// v
30// ---------------------- --------------------
31// | URL to compare | | |
32// | to all URL Query | ----translate----> | String |
33// | operators | | |
34// ---------------------- --------------------
35//
36// The reason for this problem reduction is that there are efficient algorithms
37// for searching many substrings in one string (see Aho-Corasick algorithm).
38//
39// Case 1: {host,path,query}_{prefix,suffix,equals} searches.
40// ==========================================================
41//
42// For searches in this class, we normalize URLs as follows:
43//
44// Step 1:
45// Remove scheme, port and segment from URL:
46// -> http://www.example.com:8080/index.html?search=foo#first_match becomes
47// www.example.com/index.html?search=foo
48//
49// We remove the scheme and port number because they can be checked later
50// in a secondary filter step. We remove the segment (the #... part) because
51// this is not guaranteed to be ASCII-7 encoded.
52//
53// Step 2:
54// Translate URL to String and add the following position markers:
55// - BU = Beginning of URL
56// - ED = End of Domain
57// - EP = End of Path
58// - EU = End of URL
59// Furthermore, the hostname is canonicalized to start with a ".".
60//
61// Position markers are represented as characters >127, which are therefore
62// guaranteed not to be part of the ASCII-7 encoded URL character set.
63//
64// -> www.example.com/index.html?search=foo becomes
65// BU .www.example.com ED /index.html EP ?search=foo EU
66//
67// -> www.example.com/index.html becomes
68// BU .www.example.com ED /index.html EP EU
69//
70// Step 3:
71// Translate URL Component Patterns as follows:
72//
73// host_prefix(prefix) = BU add_missing_dot_prefix(prefix)
74// -> host_prefix("www.example") = BU .www.example
75//
76// host_suffix(suffix) = suffix ED
77// -> host_suffix("example.com") = example.com ED
78// -> host_suffix(".example.com") = .example.com ED
79//
80// host_equals(domain) = BU add_missing_dot_prefix(domain) ED
81// -> host_equals("www.example.com") = BU .www.example.com ED
82//
83// Similarly for path query parameters ({path, query}_{prefix, suffix, equals}).
84//
85// With this, we can search the SubstringPatterns in the normalized URL.
86//
87//
88// Case 2: url_{prefix,suffix,equals,contains} searches.
89// =====================================================
90//
91// Step 1: as above
92//
93// Step 2:
94// Translate URL to String and add the following position markers:
95// - BU = Beginning of URL
96// - EU = End of URL
97// Furthermore, the hostname is canonicalized to start with a ".".
98//
99// -> www.example.com/index.html?search=foo becomes
100// BU .www.example.com/index.html?search=foo EU
101//
102// url_prefix(prefix) = BU add_missing_dot_prefix(prefix)
103// -> url_prefix("www.example") = BU .www.example
104//
105// url_contains(substring) = substring
106// -> url_contains("index") = index
107//
108//
109// Case 3: {host,path,query}_contains searches.
110// ============================================
111//
112// These kinds of searches are not supported directly but can be derived
113// by a combination of a url_contains() query followed by an explicit test:
114//
115// host_contains(str) = url_contains(str) followed by test whether str occurs
116// in host comonent of original URL.
117// -> host_contains("example.co") = example.co
118// followed by gurl.host().find("example.co");
119//
120// [similarly for path_contains and query_contains].
121
122
123//
124// URLMatcherCondition
125//
126
127URLMatcherCondition::URLMatcherCondition()
128 : criterion_(HOST_PREFIX),
129 substring_pattern_(NULL) {}
130
131URLMatcherCondition::~URLMatcherCondition() {}
132
133URLMatcherCondition::URLMatcherCondition(
134 Criterion criterion,
135 const SubstringPattern* substring_pattern)
136 : criterion_(criterion),
137 substring_pattern_(substring_pattern) {}
138
139URLMatcherCondition::URLMatcherCondition(const URLMatcherCondition& rhs)
140 : criterion_(rhs.criterion_),
141 substring_pattern_(rhs.substring_pattern_) {}
142
143URLMatcherCondition& URLMatcherCondition::operator=(
144 const URLMatcherCondition& rhs) {
145 criterion_ = rhs.criterion_;
146 substring_pattern_ = rhs.substring_pattern_;
147 return *this;
148}
149
150bool URLMatcherCondition::operator<(const URLMatcherCondition& rhs) const {
151 if (criterion_ < rhs.criterion_) return true;
152 if (criterion_ > rhs.criterion_) return false;
153 if (substring_pattern_ != NULL && rhs.substring_pattern_ != NULL)
154 return *substring_pattern_ < *rhs.substring_pattern_;
155 if (substring_pattern_ == NULL && rhs.substring_pattern_ != NULL) return true;
156 // Either substring_pattern_ != NULL && rhs.substring_pattern_ == NULL,
157 // or both are NULL.
158 return false;
159}
160
161bool URLMatcherCondition::IsFullURLCondition() const {
162 // For these criteria the SubstringMatcher needs to be executed on the
163 // GURL that is canonlizaliced with
164 // URLMatcherConditionFactory::CanonicalizeURLForFullSearches.
165 switch (criterion_) {
166 case HOST_CONTAINS:
167 case PATH_CONTAINS:
168 case QUERY_CONTAINS:
169 case URL_PREFIX:
170 case URL_SUFFIX:
171 case URL_CONTAINS:
172 case URL_EQUALS:
173 return true;
174 default:
175 break;
176 }
177 return false;
178}
179
180bool URLMatcherCondition::IsMatch(
181 const std::set<SubstringPattern::ID>& matching_substring_patterns,
182 const GURL& url) const {
183 DCHECK(substring_pattern_);
184 if (matching_substring_patterns.find(substring_pattern_->id()) ==
185 matching_substring_patterns.end())
186 return false;
187 // The criteria HOST_CONTAINS, PATH_CONTAINS, QUERY_CONTAINS are based on
188 // a substring match on the raw URL. In case of a match, we need to verify
189 // that the match was found in the correct component of the URL.
190 switch (criterion_) {
191 case HOST_CONTAINS:
192 return url.host().find(substring_pattern_->pattern()) !=
193 std::string::npos;
194 case PATH_CONTAINS:
195 return url.path().find(substring_pattern_->pattern()) !=
196 std::string::npos;
197 case QUERY_CONTAINS:
198 return url.query().find(substring_pattern_->pattern()) !=
199 std::string::npos;
200 default:
201 break;
202 }
203 return true;
204}
205
206//
207// URLMatcherConditionFactory
208//
209
210namespace {
211// These are symbols that are not contained in 7-bit ASCII used in GURLs.
212const char kBeginningOfURL[] = {-1, 0};
213const char kEndOfDomain[] = {-2, 0};
214const char kEndOfPath[] = {-3, 0};
215const char kEndOfURL[] = {-4, 0};
216} // namespace
217
218URLMatcherConditionFactory::URLMatcherConditionFactory() : id_counter_(0) {}
219
220URLMatcherConditionFactory::~URLMatcherConditionFactory() {
221 STLDeleteElements(&pattern_singletons_);
222}
223
224std::string URLMatcherConditionFactory::CanonicalizeURLForComponentSearches(
225 const GURL& url) {
226 return kBeginningOfURL + CanonicalizeHostname(url.host()) + kEndOfDomain +
227 url.path() + kEndOfPath + (url.has_query() ? "?" + url.query() : "") +
228 kEndOfURL;
229}
230
231URLMatcherCondition URLMatcherConditionFactory::CreateHostPrefixCondition(
232 const std::string& prefix) {
233 return CreateCondition(URLMatcherCondition::HOST_PREFIX,
234 kBeginningOfURL + CanonicalizeHostname(prefix));
235}
236
237URLMatcherCondition URLMatcherConditionFactory::CreateHostSuffixCondition(
238 const std::string& suffix) {
239 return CreateCondition(URLMatcherCondition::HOST_SUFFIX,
240 suffix + kEndOfDomain);
241}
242
243URLMatcherCondition URLMatcherConditionFactory::CreateHostContainsCondition(
244 const std::string& str) {
245 return CreateCondition(URLMatcherCondition::HOST_CONTAINS, str);
246}
247
248URLMatcherCondition URLMatcherConditionFactory::CreateHostEqualsCondition(
249 const std::string& str) {
250 return CreateCondition(URLMatcherCondition::HOST_EQUALS,
251 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfDomain);
252}
253
254URLMatcherCondition URLMatcherConditionFactory::CreatePathPrefixCondition(
255 const std::string& prefix) {
256 return CreateCondition(URLMatcherCondition::PATH_PREFIX,
257 kEndOfDomain + prefix);
258}
259
260URLMatcherCondition URLMatcherConditionFactory::CreatePathSuffixCondition(
261 const std::string& suffix) {
262 return CreateCondition(URLMatcherCondition::HOST_SUFFIX, suffix + kEndOfPath);
263}
264
265URLMatcherCondition URLMatcherConditionFactory::CreatePathContainsCondition(
266 const std::string& str) {
267 return CreateCondition(URLMatcherCondition::PATH_CONTAINS, str);
268}
269
270URLMatcherCondition URLMatcherConditionFactory::CreatePathEqualsCondition(
271 const std::string& str) {
272 return CreateCondition(URLMatcherCondition::PATH_EQUALS,
273 kEndOfDomain + str + kEndOfPath);
274}
275
276URLMatcherCondition URLMatcherConditionFactory::CreateQueryPrefixCondition(
277 const std::string& prefix) {
278 return CreateCondition(URLMatcherCondition::QUERY_PREFIX,
279 kEndOfPath + prefix);
280}
281
282URLMatcherCondition URLMatcherConditionFactory::CreateQuerySuffixCondition(
283 const std::string& suffix) {
284 return CreateCondition(URLMatcherCondition::QUERY_SUFFIX, suffix + kEndOfURL);
285}
286
287URLMatcherCondition URLMatcherConditionFactory::CreateQueryContainsCondition(
288 const std::string& str) {
289 return CreateCondition(URLMatcherCondition::QUERY_CONTAINS, str);
290}
291
292URLMatcherCondition URLMatcherConditionFactory::CreateQueryEqualsCondition(
293 const std::string& str) {
294 return CreateCondition(URLMatcherCondition::QUERY_EQUALS,
295 kEndOfPath + str + kEndOfURL);
296}
297
298URLMatcherCondition
299 URLMatcherConditionFactory::CreateHostSuffixPathPrefixCondition(
300 const std::string& host_suffix,
301 const std::string& path_prefix) {
302 return CreateCondition(URLMatcherCondition::HOST_SUFFIX_PATH_PREFIX,
303 host_suffix + kEndOfDomain + path_prefix);
304}
305
306std::string URLMatcherConditionFactory::CanonicalizeURLForFullSearches(
307 const GURL& url) {
308 return kBeginningOfURL + CanonicalizeHostname(url.host()) + url.path() +
309 (url.has_query() ? "?" + url.query() : "") + kEndOfURL;
310}
311
312URLMatcherCondition URLMatcherConditionFactory::CreateURLPrefixCondition(
313 const std::string& prefix) {
314 return CreateCondition(URLMatcherCondition::URL_PREFIX,
315 kBeginningOfURL + CanonicalizeHostname(prefix));
316}
317
318URLMatcherCondition URLMatcherConditionFactory::CreateURLSuffixCondition(
319 const std::string& suffix) {
320 return CreateCondition(URLMatcherCondition::URL_SUFFIX, suffix + kEndOfURL);
321}
322
323URLMatcherCondition URLMatcherConditionFactory::CreateURLContainsCondition(
324 const std::string& str) {
325 return CreateCondition(URLMatcherCondition::URL_CONTAINS, str);
326}
327
328URLMatcherCondition URLMatcherConditionFactory::CreateURLEqualsCondition(
329 const std::string& str) {
330 return CreateCondition(URLMatcherCondition::QUERY_EQUALS,
331 kBeginningOfURL + CanonicalizeHostname(str) + kEndOfURL);
332}
333
334void URLMatcherConditionFactory::ForgetUnusedPatterns(
335 const std::set<SubstringPattern::ID>& used_patterns) {
336 PatternSingletons::iterator i = pattern_singletons_.begin();
337 while (i != pattern_singletons_.end()) {
[email protected]2fb51d92012-02-17 15:05:47338 if (used_patterns.find((*i)->id()) != used_patterns.end()) {
[email protected]fb5bcc02012-02-17 14:05:42339 ++i;
[email protected]2fb51d92012-02-17 15:05:47340 } else {
341 delete *i;
[email protected]fb5bcc02012-02-17 14:05:42342 pattern_singletons_.erase(i++);
[email protected]2fb51d92012-02-17 15:05:47343 }
[email protected]fb5bcc02012-02-17 14:05:42344 }
345}
346
[email protected]357c4db2012-03-29 07:51:57347bool URLMatcherConditionFactory::IsEmpty() const {
348 return pattern_singletons_.empty();
349}
350
[email protected]fb5bcc02012-02-17 14:05:42351URLMatcherCondition URLMatcherConditionFactory::CreateCondition(
352 URLMatcherCondition::Criterion criterion,
353 const std::string& pattern) {
354 SubstringPattern search_pattern(pattern, 0);
355 PatternSingletons::const_iterator iter =
356 pattern_singletons_.find(&search_pattern);
357 if (iter != pattern_singletons_.end()) {
358 return URLMatcherCondition(criterion, *iter);
359 } else {
360 SubstringPattern* new_pattern =
361 new SubstringPattern(pattern, id_counter_++);
362 pattern_singletons_.insert(new_pattern);
363 return URLMatcherCondition(criterion, new_pattern);
364 }
365}
366
367std::string URLMatcherConditionFactory::CanonicalizeHostname(
368 const std::string& hostname) const {
369 if (!hostname.empty() && hostname[0] == '.')
370 return hostname;
371 else
372 return "." + hostname;
373}
374
375bool URLMatcherConditionFactory::SubstringPatternPointerCompare::operator()(
376 SubstringPattern* lhs,
377 SubstringPattern* rhs) const {
378 if (lhs == NULL && rhs != NULL) return true;
379 if (lhs != NULL && rhs != NULL)
380 return lhs->pattern() < rhs->pattern();
381 // Either both are NULL or only rhs is NULL.
382 return false;
383}
384
385//
[email protected]faceb0f2012-04-12 17:07:19386// URLMatcherSchemeFilter
387//
388
389URLMatcherSchemeFilter::URLMatcherSchemeFilter(const std::string& filter)
390 : filters_(1) {
391 filters_.push_back(filter);
392}
393
394URLMatcherSchemeFilter::URLMatcherSchemeFilter(
395 const std::vector<std::string>& filters)
396 : filters_(filters) {}
397
398URLMatcherSchemeFilter::~URLMatcherSchemeFilter() {}
399
400bool URLMatcherSchemeFilter::IsMatch(const GURL& url) const {
401 return std::find(filters_.begin(), filters_.end(), url.scheme()) !=
402 filters_.end();
403}
404
405//
[email protected]fb5bcc02012-02-17 14:05:42406// URLMatcherConditionSet
407//
408
[email protected]fb5bcc02012-02-17 14:05:42409URLMatcherConditionSet::~URLMatcherConditionSet() {}
410
411URLMatcherConditionSet::URLMatcherConditionSet(
412 ID id,
413 const Conditions& conditions)
414 : id_(id),
415 conditions_(conditions) {}
416
[email protected]faceb0f2012-04-12 17:07:19417URLMatcherConditionSet::URLMatcherConditionSet(
418 ID id,
419 const Conditions& conditions,
420 scoped_ptr<URLMatcherSchemeFilter> scheme_filter)
421 : id_(id),
422 conditions_(conditions),
423 scheme_filter_(scheme_filter.Pass()) {}
424
[email protected]fb5bcc02012-02-17 14:05:42425bool URLMatcherConditionSet::IsMatch(
426 const std::set<SubstringPattern::ID>& matching_substring_patterns,
427 const GURL& url) const {
428 for (Conditions::const_iterator i = conditions_.begin();
[email protected]3b001a02012-04-05 10:38:06429 i != conditions_.end(); ++i) {
[email protected]fb5bcc02012-02-17 14:05:42430 if (!i->IsMatch(matching_substring_patterns, url))
431 return false;
432 }
[email protected]faceb0f2012-04-12 17:07:19433 if (scheme_filter_.get() && !scheme_filter_->IsMatch(url))
434 return false;
[email protected]fb5bcc02012-02-17 14:05:42435 return true;
436}
437
438
439//
440// URLMatcher
441//
442
443URLMatcher::URLMatcher() {}
444
445URLMatcher::~URLMatcher() {}
446
447void URLMatcher::AddConditionSets(
[email protected]3b001a02012-04-05 10:38:06448 const URLMatcherConditionSet::Vector& condition_sets) {
449 for (URLMatcherConditionSet::Vector::const_iterator i =
450 condition_sets.begin(); i != condition_sets.end(); ++i) {
451 DCHECK(url_matcher_condition_sets_.find((*i)->id()) ==
452 url_matcher_condition_sets_.end());
453 url_matcher_condition_sets_[(*i)->id()] = *i;
[email protected]fb5bcc02012-02-17 14:05:42454 }
455 UpdateInternalDatastructures();
456}
457
458void URLMatcher::RemoveConditionSets(
459 const std::vector<URLMatcherConditionSet::ID>& condition_set_ids) {
460 for (std::vector<URLMatcherConditionSet::ID>::const_iterator i =
[email protected]3b001a02012-04-05 10:38:06461 condition_set_ids.begin(); i != condition_set_ids.end(); ++i) {
[email protected]fb5bcc02012-02-17 14:05:42462 DCHECK(url_matcher_condition_sets_.find(*i) !=
463 url_matcher_condition_sets_.end());
464 url_matcher_condition_sets_.erase(*i);
465 }
466 UpdateInternalDatastructures();
467}
468
[email protected]d552b432012-03-29 07:42:32469void URLMatcher::ClearUnusedConditionSets() {
470 UpdateConditionFactory();
471}
472
[email protected]fb5bcc02012-02-17 14:05:42473std::set<URLMatcherConditionSet::ID> URLMatcher::MatchURL(const GURL& url) {
474 // Find all IDs of SubstringPatterns that match |url|.
475 // See URLMatcherConditionFactory for the canonicalization of URLs and the
476 // distinction between full url searches and url component searches.
477 std::set<SubstringPattern::ID> matches;
478 full_url_matcher_.Match(
479 condition_factory_.CanonicalizeURLForFullSearches(url), &matches);
480 url_component_matcher_.Match(
481 condition_factory_.CanonicalizeURLForComponentSearches(url), &matches);
482
483 // Calculate all URLMatcherConditionSets for which all URLMatcherConditions
484 // were fulfilled.
485 std::set<URLMatcherConditionSet::ID> result;
486 for (std::set<SubstringPattern::ID>::const_iterator i = matches.begin();
[email protected]3b001a02012-04-05 10:38:06487 i != matches.end(); ++i) {
[email protected]fb5bcc02012-02-17 14:05:42488 // For each URLMatcherConditionSet there is exactly one condition
489 // registered in substring_match_triggers_. This means that the following
490 // logic tests each URLMatcherConditionSet exactly once if it can be
491 // completely fulfilled.
492 std::set<URLMatcherConditionSet::ID>& condition_sets =
493 substring_match_triggers_[*i];
494 for (std::set<URLMatcherConditionSet::ID>::const_iterator j =
[email protected]3b001a02012-04-05 10:38:06495 condition_sets.begin(); j != condition_sets.end(); ++j) {
496 if (url_matcher_condition_sets_[*j]->IsMatch(matches, url))
[email protected]fb5bcc02012-02-17 14:05:42497 result.insert(*j);
498 }
499 }
500
501 return result;
502}
503
[email protected]357c4db2012-03-29 07:51:57504bool URLMatcher::IsEmpty() const {
505 return condition_factory_.IsEmpty() &&
506 url_matcher_condition_sets_.empty() &&
507 substring_match_triggers_.empty() &&
508 full_url_matcher_.IsEmpty() &&
509 url_component_matcher_.IsEmpty() &&
510 registered_full_url_patterns_.empty() &&
511 registered_url_component_patterns_.empty();
512}
513
[email protected]fb5bcc02012-02-17 14:05:42514void URLMatcher::UpdateSubstringSetMatcher(bool full_url_conditions) {
515 // The purpose of |full_url_conditions| is just that we need to execute
516 // the same logic once for Full URL searches and once for URL Component
517 // searches (see URLMatcherConditionFactory).
518
519 // Determine which patterns need to be registered when this function
520 // terminates.
521 std::set<const SubstringPattern*> new_patterns;
522 for (URLMatcherConditionSets::const_iterator condition_set_iter =
523 url_matcher_condition_sets_.begin();
524 condition_set_iter != url_matcher_condition_sets_.end();
525 ++condition_set_iter) {
526 const URLMatcherConditionSet::Conditions& conditions =
[email protected]3b001a02012-04-05 10:38:06527 condition_set_iter->second->conditions();
[email protected]fb5bcc02012-02-17 14:05:42528 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
[email protected]3b001a02012-04-05 10:38:06529 conditions.begin(); condition_iter != conditions.end();
530 ++condition_iter) {
[email protected]fb5bcc02012-02-17 14:05:42531 // If we are called to process Full URL searches, ignore all others,
532 // and vice versa.
533 if (full_url_conditions == condition_iter->IsFullURLCondition())
534 new_patterns.insert(condition_iter->substring_pattern());
535 }
536 }
537
538 // This is the set of patterns that were registered before this function
539 // is called.
540 std::set<const SubstringPattern*>& registered_patterns =
541 full_url_conditions ? registered_full_url_patterns_
542 : registered_url_component_patterns_;
543
544 // Add all patterns that are in new_patterns but not in registered_patterns.
545 std::vector<const SubstringPattern*> patterns_to_register;
546 std::set_difference(
547 new_patterns.begin(), new_patterns.end(),
548 registered_patterns.begin(), registered_patterns.end(),
549 std::back_inserter(patterns_to_register));
550
551 // Remove all patterns that are in registered_patterns but not in
552 // new_patterns.
553 std::vector<const SubstringPattern*> patterns_to_unregister;
554 std::set_difference(
555 registered_patterns.begin(), registered_patterns.end(),
556 new_patterns.begin(), new_patterns.end(),
557 std::back_inserter(patterns_to_unregister));
558
559 // Update the SubstringSetMatcher.
560 SubstringSetMatcher& url_matcher =
561 full_url_conditions ? full_url_matcher_ : url_component_matcher_;
562 url_matcher.RegisterAndUnregisterPatterns(patterns_to_register,
563 patterns_to_unregister);
564
565 // Update the set of registered_patterns for the next time this function
566 // is being called.
567 registered_patterns.swap(new_patterns);
568}
569
570void URLMatcher::UpdateTriggers() {
571 // Count substring pattern frequencies.
572 std::map<SubstringPattern::ID, size_t> substring_pattern_frequencies;
573 for (URLMatcherConditionSets::const_iterator condition_set_iter =
574 url_matcher_condition_sets_.begin();
575 condition_set_iter != url_matcher_condition_sets_.end();
576 ++condition_set_iter) {
577 const URLMatcherConditionSet::Conditions& conditions =
[email protected]3b001a02012-04-05 10:38:06578 condition_set_iter->second->conditions();
[email protected]fb5bcc02012-02-17 14:05:42579 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
[email protected]3b001a02012-04-05 10:38:06580 conditions.begin(); condition_iter != conditions.end();
581 ++condition_iter) {
[email protected]fb5bcc02012-02-17 14:05:42582 const SubstringPattern* pattern = condition_iter->substring_pattern();
583 substring_pattern_frequencies[pattern->id()]++;
584 }
585 }
586
587 // Update trigger conditions: Determine for each URLMatcherConditionSet which
588 // URLMatcherCondition contains a SubstringPattern that occurs least
589 // frequently in this URLMatcher. We assume that this condition is very
590 // specific and occurs rarely in URLs. If a match occurs for this
591 // URLMatcherCondition, we want to test all other URLMatcherCondition in the
592 // respective URLMatcherConditionSet as well to see whether the entire
593 // URLMatcherConditionSet is considered matching.
594 substring_match_triggers_.clear();
595 for (URLMatcherConditionSets::const_iterator condition_set_iter =
596 url_matcher_condition_sets_.begin();
597 condition_set_iter != url_matcher_condition_sets_.end();
598 ++condition_set_iter) {
599 const URLMatcherConditionSet::Conditions& conditions =
[email protected]3b001a02012-04-05 10:38:06600 condition_set_iter->second->conditions();
[email protected]fb5bcc02012-02-17 14:05:42601 if (conditions.empty())
602 continue;
603 URLMatcherConditionSet::Conditions::const_iterator condition_iter =
604 conditions.begin();
605 SubstringPattern::ID trigger = condition_iter->substring_pattern()->id();
606 // We skip the first element in the following loop.
607 ++condition_iter;
608 for (; condition_iter != conditions.end(); ++condition_iter) {
609 SubstringPattern::ID current_id =
610 condition_iter->substring_pattern()->id();
611 if (substring_pattern_frequencies[trigger] >
612 substring_pattern_frequencies[current_id]) {
613 trigger = current_id;
614 }
615 }
[email protected]3b001a02012-04-05 10:38:06616 substring_match_triggers_[trigger].insert(condition_set_iter->second->id());
[email protected]fb5bcc02012-02-17 14:05:42617 }
618}
619
620void URLMatcher::UpdateConditionFactory() {
621 std::set<SubstringPattern::ID> used_patterns;
622 for (URLMatcherConditionSets::const_iterator condition_set_iter =
623 url_matcher_condition_sets_.begin();
624 condition_set_iter != url_matcher_condition_sets_.end();
625 ++condition_set_iter) {
626 const URLMatcherConditionSet::Conditions& conditions =
[email protected]3b001a02012-04-05 10:38:06627 condition_set_iter->second->conditions();
[email protected]fb5bcc02012-02-17 14:05:42628 for (URLMatcherConditionSet::Conditions::const_iterator condition_iter =
[email protected]3b001a02012-04-05 10:38:06629 conditions.begin(); condition_iter != conditions.end();
630 ++condition_iter) {
[email protected]fb5bcc02012-02-17 14:05:42631 used_patterns.insert(condition_iter->substring_pattern()->id());
632 }
633 }
634 condition_factory_.ForgetUnusedPatterns(used_patterns);
635}
636
637void URLMatcher::UpdateInternalDatastructures() {
638 UpdateSubstringSetMatcher(false);
639 UpdateSubstringSetMatcher(true);
640 UpdateTriggers();
641 UpdateConditionFactory();
642}
643
644} // namespace extensions