blob: f68015f50537143fe5786cf3fbbdcdd52a4fa319 [file] [log] [blame]
license.botbf09a502008-08-24 00:55:551// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit09911bf2008-07-26 23:55:294
5#ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__
6#define CHROME_COMMON_VISITEDLINK_COMMON_H__
[email protected]32b76ef2010-07-26 23:08:247#pragma once
initial.commit09911bf2008-07-26 23:55:298
[email protected]3e90d4a2009-07-03 17:38:399#include <vector>
initial.commit09911bf2008-07-26 23:55:2910
11#include "base/basictypes.h"
[email protected]e13ad79b2010-07-22 21:36:5012
13class GURL;
initial.commit09911bf2008-07-26 23:55:2914
15// number of bytes in the salt
16#define LINK_SALT_LENGTH 8
17
18// A multiprocess-safe database of the visited links for the browser. There
19// should be exactly one process that has write access (implemented by
20// VisitedLinkMaster), while all other processes should be read-only
21// (implemented by VisitedLinkSlave). These other processes add links by calling
22// the writer process to add them for it. The writer may also notify the readers
23// to replace their table when the table is resized.
24//
25// IPC is not implemented in these classes. This is done through callback
26// functions supplied by the creator of these objects to allow more flexibility,
27// especially for testing.
28//
29// This class defines the common base for these others. We implement accessors
30// for looking things up in the hash table, and for computing hash values and
31// fingerprints. Both the master and the slave inherit from this, and add their
32// own code to set up and change these values as their design requires. The
33// slave pretty much just sets up the shared memory and saves the pointer. The
34// master does a lot of work to manage the table, reading and writing it to and
35// from disk, and resizing it when it gets too full.
36//
37// To ask whether a page is in history, we compute a 64-bit fingerprint of the
38// URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
39// we consider it visited. Otherwise, it is unvisited. Note that it is possible
40// to get collisions, which is the penalty for not storing all URL strings in
41// memory (which could get to be more than we want to have in memory). We use
42// a salt value for the links on one computer so that an attacker can not
43// manually create a link that causes a collision.
44class VisitedLinkCommon {
45 public:
46 // A number that identifies the URL.
47 typedef uint64 Fingerprint;
[email protected]3e90d4a2009-07-03 17:38:3948 typedef std::vector<Fingerprint> Fingerprints;
initial.commit09911bf2008-07-26 23:55:2949
50 // A hash value of a fingerprint
51 typedef int32 Hash;
52
53 // A fingerprint or hash value that does not exist
54 static const Fingerprint null_fingerprint_;
55 static const Hash null_hash_;
56
57 VisitedLinkCommon();
58 virtual ~VisitedLinkCommon();
59
[email protected]8a6e1742008-12-13 17:18:5060 // Returns the fingerprint for the given URL.
61 Fingerprint ComputeURLFingerprint(const char* canonical_url,
62 size_t url_len) const {
63 return ComputeURLFingerprint(canonical_url, url_len, salt_);
64 }
65
66 // Looks up the given key in the table. The fingerprint for the URL is
67 // computed if you call one with the string argument. Returns true if found.
68 // Does not modify the hastable.
initial.commit09911bf2008-07-26 23:55:2969 bool IsVisited(const char* canonical_url, size_t url_len) const;
[email protected]e13ad79b2010-07-22 21:36:5070 bool IsVisited(const GURL& url) const;
[email protected]8a6e1742008-12-13 17:18:5071 bool IsVisited(Fingerprint fingerprint) const;
initial.commit09911bf2008-07-26 23:55:2972
73#ifdef UNIT_TEST
74 // Returns statistics about DB usage
75 void GetUsageStatistics(int32* table_size,
76 VisitedLinkCommon::Fingerprint** fingerprints) {
77 *table_size = table_length_;
78 *fingerprints = hash_table_;
79 }
80#endif
81
82 protected:
83 // This structure is at the beginning of the shared memory so that the slaves
84 // can get stats on the table
85 struct SharedHeader {
86 // see goes into table_length_
87 uint32 length;
88
89 // goes into salt_
90 uint8 salt[LINK_SALT_LENGTH];
91 };
92
93 // Returns the fingerprint at the given index into the URL table. This
[email protected]f09c7182009-03-10 12:54:0494 // function should be called instead of accessing the table directly to
95 // contain endian issues.
initial.commit09911bf2008-07-26 23:55:2996 Fingerprint FingerprintAt(int32 table_offset) const {
initial.commit09911bf2008-07-26 23:55:2997 if (!hash_table_)
[email protected]7fb6c862009-03-13 02:51:4998 return null_fingerprint_;
initial.commit09911bf2008-07-26 23:55:2999 return hash_table_[table_offset];
100 }
101
initial.commit09911bf2008-07-26 23:55:29102 // Computes the fingerprint of the given canonical URL. It is static so the
103 // same algorithm can be re-used by the table rebuilder, so you will have to
[email protected]8a6e1742008-12-13 17:18:50104 // pass the salt as a parameter. See the non-static version above if you
105 // want to use the current class' salt.
initial.commit09911bf2008-07-26 23:55:29106 static Fingerprint ComputeURLFingerprint(const char* canonical_url,
107 size_t url_len,
108 const uint8 salt[LINK_SALT_LENGTH]);
109
110 // Computes the hash value of the given fingerprint, this is used as a lookup
111 // into the hashtable.
112 static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
[email protected]7fb6c862009-03-13 02:51:49113 if (table_length == 0)
114 return null_hash_;
initial.commit09911bf2008-07-26 23:55:29115 return static_cast<Hash>(fingerprint % table_length);
116 }
[email protected]f09c7182009-03-10 12:54:04117 // Uses the current hashtable.
118 Hash HashFingerprint(Fingerprint fingerprint) const {
initial.commit09911bf2008-07-26 23:55:29119 return HashFingerprint(fingerprint, table_length_);
120 }
121
122 // pointer to the first item
123 VisitedLinkCommon::Fingerprint* hash_table_;
124
125 // the number of items in the hash table
126 int32 table_length_;
127
128 // salt used for each URL when computing the fingerprint
129 uint8 salt_[LINK_SALT_LENGTH];
130
131 private:
[email protected]312ef322009-08-28 19:33:29132 DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
initial.commit09911bf2008-07-26 23:55:29133};
134
[email protected]11f4857282009-11-13 19:56:17135#endif // CHROME_COMMON_VISITEDLINK_COMMON_H_