blob: 65f540b37ba42c31954c017e82ae12c2f00c6641 [file] [log] [blame]
license.botbf09a502008-08-24 00:55:551// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit09911bf2008-07-26 23:55:294
5#ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__
6#define CHROME_COMMON_VISITEDLINK_COMMON_H__
7
8#include <string>
9
10#include "base/basictypes.h"
11#include "base/logging.h"
12#include "googleurl/src/gurl.h"
13
14// number of bytes in the salt
15#define LINK_SALT_LENGTH 8
16
17// A multiprocess-safe database of the visited links for the browser. There
18// should be exactly one process that has write access (implemented by
19// VisitedLinkMaster), while all other processes should be read-only
20// (implemented by VisitedLinkSlave). These other processes add links by calling
21// the writer process to add them for it. The writer may also notify the readers
22// to replace their table when the table is resized.
23//
24// IPC is not implemented in these classes. This is done through callback
25// functions supplied by the creator of these objects to allow more flexibility,
26// especially for testing.
27//
28// This class defines the common base for these others. We implement accessors
29// for looking things up in the hash table, and for computing hash values and
30// fingerprints. Both the master and the slave inherit from this, and add their
31// own code to set up and change these values as their design requires. The
32// slave pretty much just sets up the shared memory and saves the pointer. The
33// master does a lot of work to manage the table, reading and writing it to and
34// from disk, and resizing it when it gets too full.
35//
36// To ask whether a page is in history, we compute a 64-bit fingerprint of the
37// URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
38// we consider it visited. Otherwise, it is unvisited. Note that it is possible
39// to get collisions, which is the penalty for not storing all URL strings in
40// memory (which could get to be more than we want to have in memory). We use
41// a salt value for the links on one computer so that an attacker can not
42// manually create a link that causes a collision.
43class VisitedLinkCommon {
44 public:
45 // A number that identifies the URL.
46 typedef uint64 Fingerprint;
47
48 // A hash value of a fingerprint
49 typedef int32 Hash;
50
51 // A fingerprint or hash value that does not exist
52 static const Fingerprint null_fingerprint_;
53 static const Hash null_hash_;
54
55 VisitedLinkCommon();
56 virtual ~VisitedLinkCommon();
57
58 // Computes the fingerprint of the key and looks it up in the table. We
59 // return true if found. Does not modify the hastable. The input should be
60 // the canonical 16-bit URL.
61 bool IsVisited(const char* canonical_url, size_t url_len) const;
62 bool IsVisited(const GURL& url) const {
63 return IsVisited(url.spec().data(), url.spec().size());
64 }
65
66#ifdef UNIT_TEST
67 // Returns statistics about DB usage
68 void GetUsageStatistics(int32* table_size,
69 VisitedLinkCommon::Fingerprint** fingerprints) {
70 *table_size = table_length_;
71 *fingerprints = hash_table_;
72 }
73#endif
74
75 protected:
76 // This structure is at the beginning of the shared memory so that the slaves
77 // can get stats on the table
78 struct SharedHeader {
79 // see goes into table_length_
80 uint32 length;
81
82 // goes into salt_
83 uint8 salt[LINK_SALT_LENGTH];
84 };
85
86 // Returns the fingerprint at the given index into the URL table. This
87 // function should be called instead of accessing the table directly to contain
88 // endian issues.
89 Fingerprint FingerprintAt(int32 table_offset) const {
90 DCHECK(hash_table_);
91 if (!hash_table_)
92 return 0;
93 return hash_table_[table_offset];
94 }
95
96 // Returns true if the given fingerprint is in the table.
97 bool IsVisited(Fingerprint fingerprint) const;
98
99 // Computes the fingerprint of the given canonical URL. It is static so the
100 // same algorithm can be re-used by the table rebuilder, so you will have to
101 // pass the salt as a parameter.
102 static Fingerprint ComputeURLFingerprint(const char* canonical_url,
103 size_t url_len,
104 const uint8 salt[LINK_SALT_LENGTH]);
105
106 // Computes the hash value of the given fingerprint, this is used as a lookup
107 // into the hashtable.
108 static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
109 return static_cast<Hash>(fingerprint % table_length);
110 }
111 Hash HashFingerprint(Fingerprint fingerprint) const { // uses the current hashtable
112 return HashFingerprint(fingerprint, table_length_);
113 }
114
115 // pointer to the first item
116 VisitedLinkCommon::Fingerprint* hash_table_;
117
118 // the number of items in the hash table
119 int32 table_length_;
120
121 // salt used for each URL when computing the fingerprint
122 uint8 salt_[LINK_SALT_LENGTH];
123
124 private:
125 DISALLOW_EVIL_CONSTRUCTORS(VisitedLinkCommon);
126};
127
128#endif // WIN_COMMON_VISITEDLINK_COMMON_H__
license.botbf09a502008-08-24 00:55:55129