license.bot | bf09a50 | 2008-08-24 00:55:55 | [diff] [blame] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 4 | |
| 5 | #ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__ |
| 6 | #define CHROME_COMMON_VISITEDLINK_COMMON_H__ |
[email protected] | 32b76ef | 2010-07-26 23:08:24 | [diff] [blame] | 7 | #pragma once |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 8 | |
[email protected] | 3e90d4a | 2009-07-03 17:38:39 | [diff] [blame] | 9 | #include <vector> |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 10 | |
| 11 | #include "base/basictypes.h" |
[email protected] | e13ad79b | 2010-07-22 21:36:50 | [diff] [blame] | 12 | |
| 13 | class GURL; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 14 | |
| 15 | // number of bytes in the salt |
| 16 | #define LINK_SALT_LENGTH 8 |
| 17 | |
| 18 | // A multiprocess-safe database of the visited links for the browser. There |
| 19 | // should be exactly one process that has write access (implemented by |
| 20 | // VisitedLinkMaster), while all other processes should be read-only |
| 21 | // (implemented by VisitedLinkSlave). These other processes add links by calling |
| 22 | // the writer process to add them for it. The writer may also notify the readers |
| 23 | // to replace their table when the table is resized. |
| 24 | // |
| 25 | // IPC is not implemented in these classes. This is done through callback |
| 26 | // functions supplied by the creator of these objects to allow more flexibility, |
| 27 | // especially for testing. |
| 28 | // |
| 29 | // This class defines the common base for these others. We implement accessors |
| 30 | // for looking things up in the hash table, and for computing hash values and |
| 31 | // fingerprints. Both the master and the slave inherit from this, and add their |
| 32 | // own code to set up and change these values as their design requires. The |
| 33 | // slave pretty much just sets up the shared memory and saves the pointer. The |
| 34 | // master does a lot of work to manage the table, reading and writing it to and |
| 35 | // from disk, and resizing it when it gets too full. |
| 36 | // |
| 37 | // To ask whether a page is in history, we compute a 64-bit fingerprint of the |
| 38 | // URL. This URL is hashed and we see if it is in the URL hashtable. If it is, |
| 39 | // we consider it visited. Otherwise, it is unvisited. Note that it is possible |
| 40 | // to get collisions, which is the penalty for not storing all URL strings in |
| 41 | // memory (which could get to be more than we want to have in memory). We use |
| 42 | // a salt value for the links on one computer so that an attacker can not |
| 43 | // manually create a link that causes a collision. |
| 44 | class VisitedLinkCommon { |
| 45 | public: |
| 46 | // A number that identifies the URL. |
| 47 | typedef uint64 Fingerprint; |
[email protected] | 3e90d4a | 2009-07-03 17:38:39 | [diff] [blame] | 48 | typedef std::vector<Fingerprint> Fingerprints; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 49 | |
| 50 | // A hash value of a fingerprint |
| 51 | typedef int32 Hash; |
| 52 | |
| 53 | // A fingerprint or hash value that does not exist |
| 54 | static const Fingerprint null_fingerprint_; |
| 55 | static const Hash null_hash_; |
| 56 | |
| 57 | VisitedLinkCommon(); |
| 58 | virtual ~VisitedLinkCommon(); |
| 59 | |
[email protected] | 8a6e174 | 2008-12-13 17:18:50 | [diff] [blame] | 60 | // Returns the fingerprint for the given URL. |
| 61 | Fingerprint ComputeURLFingerprint(const char* canonical_url, |
| 62 | size_t url_len) const { |
| 63 | return ComputeURLFingerprint(canonical_url, url_len, salt_); |
| 64 | } |
| 65 | |
| 66 | // Looks up the given key in the table. The fingerprint for the URL is |
| 67 | // computed if you call one with the string argument. Returns true if found. |
| 68 | // Does not modify the hastable. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 69 | bool IsVisited(const char* canonical_url, size_t url_len) const; |
[email protected] | e13ad79b | 2010-07-22 21:36:50 | [diff] [blame] | 70 | bool IsVisited(const GURL& url) const; |
[email protected] | 8a6e174 | 2008-12-13 17:18:50 | [diff] [blame] | 71 | bool IsVisited(Fingerprint fingerprint) const; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 72 | |
| 73 | #ifdef UNIT_TEST |
| 74 | // Returns statistics about DB usage |
| 75 | void GetUsageStatistics(int32* table_size, |
| 76 | VisitedLinkCommon::Fingerprint** fingerprints) { |
| 77 | *table_size = table_length_; |
| 78 | *fingerprints = hash_table_; |
| 79 | } |
| 80 | #endif |
| 81 | |
| 82 | protected: |
| 83 | // This structure is at the beginning of the shared memory so that the slaves |
| 84 | // can get stats on the table |
| 85 | struct SharedHeader { |
| 86 | // see goes into table_length_ |
| 87 | uint32 length; |
| 88 | |
| 89 | // goes into salt_ |
| 90 | uint8 salt[LINK_SALT_LENGTH]; |
| 91 | }; |
| 92 | |
| 93 | // Returns the fingerprint at the given index into the URL table. This |
[email protected] | f09c718 | 2009-03-10 12:54:04 | [diff] [blame] | 94 | // function should be called instead of accessing the table directly to |
| 95 | // contain endian issues. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 96 | Fingerprint FingerprintAt(int32 table_offset) const { |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 97 | if (!hash_table_) |
[email protected] | 7fb6c86 | 2009-03-13 02:51:49 | [diff] [blame] | 98 | return null_fingerprint_; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 99 | return hash_table_[table_offset]; |
| 100 | } |
| 101 | |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 102 | // Computes the fingerprint of the given canonical URL. It is static so the |
| 103 | // same algorithm can be re-used by the table rebuilder, so you will have to |
[email protected] | 8a6e174 | 2008-12-13 17:18:50 | [diff] [blame] | 104 | // pass the salt as a parameter. See the non-static version above if you |
| 105 | // want to use the current class' salt. |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 106 | static Fingerprint ComputeURLFingerprint(const char* canonical_url, |
| 107 | size_t url_len, |
| 108 | const uint8 salt[LINK_SALT_LENGTH]); |
| 109 | |
| 110 | // Computes the hash value of the given fingerprint, this is used as a lookup |
| 111 | // into the hashtable. |
| 112 | static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) { |
[email protected] | 7fb6c86 | 2009-03-13 02:51:49 | [diff] [blame] | 113 | if (table_length == 0) |
| 114 | return null_hash_; |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 115 | return static_cast<Hash>(fingerprint % table_length); |
| 116 | } |
[email protected] | f09c718 | 2009-03-10 12:54:04 | [diff] [blame] | 117 | // Uses the current hashtable. |
| 118 | Hash HashFingerprint(Fingerprint fingerprint) const { |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 119 | return HashFingerprint(fingerprint, table_length_); |
| 120 | } |
| 121 | |
| 122 | // pointer to the first item |
| 123 | VisitedLinkCommon::Fingerprint* hash_table_; |
| 124 | |
| 125 | // the number of items in the hash table |
| 126 | int32 table_length_; |
| 127 | |
| 128 | // salt used for each URL when computing the fingerprint |
| 129 | uint8 salt_[LINK_SALT_LENGTH]; |
| 130 | |
| 131 | private: |
[email protected] | 312ef32 | 2009-08-28 19:33:29 | [diff] [blame] | 132 | DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon); |
initial.commit | 09911bf | 2008-07-26 23:55:29 | [diff] [blame] | 133 | }; |
| 134 | |
[email protected] | 11f485728 | 2009-11-13 19:56:17 | [diff] [blame] | 135 | #endif // CHROME_COMMON_VISITEDLINK_COMMON_H_ |