summaryrefslogtreecommitdiffstats
path: root/chrome/browser/visitedlink_master.cc
blob: 5d48d63e8887db68585daad9a6992886abb8f992 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/visitedlink_master.h"

#if defined(OS_WIN)
#include <windows.h>
#include <io.h>
#include <shlobj.h>
#endif  // defined(OS_WIN)
#include <stdio.h>

#include <algorithm>

#include "base/file_util.h"
#include "base/logging.h"
#include "base/message_loop.h"
#include "base/path_service.h"
#include "base/process_util.h"
#include "base/rand_util.h"
#include "base/stack_container.h"
#include "base/string_util.h"
#include "base/thread.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/history/history.h"
#include "chrome/browser/profile.h"
#if defined(OS_WIN)
#include "chrome/common/win_util.h"
#endif

using file_util::ScopedFILE;
using file_util::OpenFile;
using file_util::TruncateFile;

const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;

const int32 VisitedLinkMaster::kFileCurrentVersion = 2;

// the signature at the beginning of the URL table = "VLnk" (visited links)
const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
const size_t VisitedLinkMaster::kFileHeaderSize =
    kFileHeaderSaltOffset + LINK_SALT_LENGTH;

// This value should also be the same as the smallest size in the lookup
// table in NewTableSizeForCount (prime number).
const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;

const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;

namespace {

// Fills the given salt structure with some quasi-random values
// It is not necessary to generate a cryptographically strong random string,
// only that it be reasonably different for different users.
void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
  DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
  uint64 randval = base::RandUint64();
  memcpy(salt, &randval, 8);
}

// AsyncWriter ----------------------------------------------------------------

// This task executes on a background thread and executes a write. This
// prevents us from blocking the UI thread doing I/O.
class AsyncWriter : public Task {
 public:
  AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len)
      : file_(file),
        offset_(offset) {
    data_->resize(data_len);
    memcpy(&*data_->begin(), data, data_len);
  }

  virtual void Run() {
    WriteToFile(file_, offset_,
                &*data_->begin(), static_cast<int32>(data_->size()));
  }

  // Exposed as a static so it can be called directly from the Master to
  // reduce the number of platform-specific I/O sites we have. Returns true if
  // the write was complete.
  static bool WriteToFile(FILE* file,
                          off_t offset,
                          const void* data,
                          size_t data_len) {
    if (fseek(file, offset, SEEK_SET) != 0)
      return false;  // Don't write to an invalid part of the file.

    size_t num_written = fwrite(data, 1, data_len, file);

    // The write may not make it to the kernel (stdlib may buffer the write)
    // until the next fseek/fclose call.  If we crash, it's easy for our used
    // item count to be out of sync with the number of hashes we write. 
    // Protect against this by calling fflush.
    int ret = fflush(file);
    DCHECK_EQ(0, ret);
    return num_written == data_len;
  }

 private:
  // The data to write and where to write it.
  FILE* file_;
  int32 offset_;  // Offset from the beginning of the file.

  // Most writes are just a single fingerprint, so we reserve that much in this
  // object to avoid mallocs in that case.
  StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_;

  DISALLOW_COPY_AND_ASSIGN(AsyncWriter);
};

// Used to asynchronously set the end of the file. This must be done on the
// same thread as the writing to keep things synchronized.
class AsyncSetEndOfFile : public Task {
 public:
  explicit AsyncSetEndOfFile(FILE* file) : file_(file) {}

  virtual void Run() {
    TruncateFile(file_);
  }

 private:
  FILE* file_;
  DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile);
};

// Used to asynchronously close a file. This must be done on the same thread as
// the writing to keep things synchronized.
class AsyncCloseHandle : public Task {
 public:
  explicit AsyncCloseHandle(FILE* file) : file_(file) {}

  virtual void Run() {
    fclose(file_);
  }

 private:
  FILE* file_;
  DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle);
};

}  // namespace

// TableBuilder ---------------------------------------------------------------

// How rebuilding from history works
// ---------------------------------
//
// We mark that we're rebuilding from history by setting the table_builder_
// member in VisitedLinkMaster to the TableBuilder we create. This builder
// will be called on the history thread by the history system for every URL
// in the database.
//
// The builder will store the fingerprints for those URLs, and then marshalls
// back to the main thread where the VisitedLinkMaster will be notified. The
// master then replaces its table with a new table containing the computed
// fingerprints.
//
// The builder must remain active while the history system is using it.
// Sometimes, the master will be deleted before the rebuild is complete, in
// which case it notifies the builder via DisownMaster(). The builder will
// delete itself once rebuilding is complete, and not execute any callback.
class VisitedLinkMaster::TableBuilder : public HistoryService::URLEnumerator,
                                        public base::RefCounted<TableBuilder> {
 public:
  TableBuilder(VisitedLinkMaster* master,
               const uint8 salt[LINK_SALT_LENGTH]);

  // Called on the main thread when the master is being destroyed. This will
  // prevent a crash when the query completes and the master is no longer
  // around. We can not actually do anything but mark this fact, since the
  // table will be being rebuilt simultaneously on the other thread.
  void DisownMaster();

  // HistoryService::URLEnumerator
  virtual void OnURL(const GURL& url);
  virtual void OnComplete(bool succeed);

 private:
  // OnComplete mashals to this function on the main thread to do the
  // notification.
  void OnCompleteMainThread();

  // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
  VisitedLinkMaster* master_;

  // The thread the visited link master is on where we will notify it.
  MessageLoop* main_message_loop_;

  // Indicates whether the operation has failed or not.
  bool success_;

  // Salt for this new table.
  uint8 salt_[LINK_SALT_LENGTH];

  // Stores the fingerprints we computed on the background thread.
  std::vector<VisitedLinkMaster::Fingerprint> fingerprints_;
};

// VisitedLinkMaster ----------------------------------------------------------

VisitedLinkMaster::VisitedLinkMaster(base::Thread* file_thread,
                                     PostNewTableEvent* poster,
                                     Profile* profile) {
  InitMembers(file_thread, poster, profile);
}

VisitedLinkMaster::VisitedLinkMaster(base::Thread* file_thread,
                                     PostNewTableEvent* poster,
                                     HistoryService* history_service,
                                     bool suppress_rebuild,
                                     const FilePath& filename,
                                     int32 default_table_size) {
  InitMembers(file_thread, poster, NULL);

  database_name_override_ = filename;
  table_size_override_ = default_table_size;
  history_service_override_ = history_service;
  suppress_rebuild_ = suppress_rebuild;
}

VisitedLinkMaster::~VisitedLinkMaster() {
  if (table_builder_.get()) {
    // Prevent the table builder from calling us back now that we're being
    // destroyed. Note that we DON'T delete the object, since the history
    // system is still writing into it. When that is complete, the table
    // builder will destroy itself when it finds we are gone.
    table_builder_->DisownMaster();
  }
  FreeURLTable();
}

void VisitedLinkMaster::InitMembers(base::Thread* file_thread,
                                    PostNewTableEvent* poster,
                                    Profile* profile) {
  if (file_thread)
    file_thread_ = file_thread->message_loop();
  else
    file_thread_ = NULL;

  post_new_table_event_ = poster;
  file_ = NULL;
  shared_memory_ = NULL;
  shared_memory_serial_ = 0;
  used_items_ = 0;
  table_size_override_ = 0;
  history_service_override_ = NULL;
  suppress_rebuild_ = false;
  profile_ = profile;

#ifndef NDEBUG
  posted_asynchronous_operation_ = false;
#endif
}

// The shared memory name should be unique on the system and also needs to
// change when we create a new table. The scheme we use includes the process
// ID, an increasing serial number, and the profile ID.
std::wstring VisitedLinkMaster::GetSharedMemoryName() const {
  // When unit testing, there's no profile, so use an empty ID string.
  std::wstring profile_id;
  if (profile_)
    profile_id = profile_->GetID().c_str();

  return StringPrintf(L"GVisitedLinks_%lu_%lu_%ls",
                      base::GetCurrentProcId(), shared_memory_serial_,
                      profile_id.c_str());
}

bool VisitedLinkMaster::Init() {
  if (!InitFromFile())
    return InitFromScratch(suppress_rebuild_);
  return true;
}

bool VisitedLinkMaster::ShareToProcess(base::ProcessHandle process,
                                       base::SharedMemoryHandle *new_handle) {
  if (shared_memory_)
    return shared_memory_->ShareToProcess(process, new_handle);

  NOTREACHED();
  return false;
}

base::SharedMemoryHandle VisitedLinkMaster::GetSharedMemoryHandle() {
  return shared_memory_->handle();
}

VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
  // Extra check that we are not off the record. This should not happen.
  if (profile_ && profile_->IsOffTheRecord()) {
    NOTREACHED();
    return null_hash_;
  }

  if (!url.is_valid())
    return null_hash_;  // Don't add invalid URLs.

  Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
                                                  url.spec().size(),
                                                  salt_);
  if (table_builder_) {
    // If we have a pending delete for this fingerprint, cancel it.
    std::set<Fingerprint>::iterator found =
        deleted_since_rebuild_.find(fingerprint);
    if (found != deleted_since_rebuild_.end())
        deleted_since_rebuild_.erase(found);

    // A rebuild is in progress, save this addition in the temporary list so
    // it can be added once rebuild is complete.
    added_since_rebuild_.insert(fingerprint);
  }

  // If the table is "full", we don't add URLs and just drop them on the floor.
  // This can happen if we get thousands of new URLs and something causes
  // the table resizing to fail. This check prevents a hang in that case. Note
  // that this is *not* the resize limit, this is just a sanity check.
  if (used_items_ / 8 > table_length_ / 10)
    return null_hash_;  // Table is more than 80% full.

  return AddFingerprint(fingerprint);
}

void VisitedLinkMaster::AddURL(const GURL& url) {
  Hash index = TryToAddURL(url);
  if (!table_builder_ && index != null_hash_) {
    // Not rebuilding, so we want to keep the file on disk up-to-date.
    WriteUsedItemCountToFile();
    WriteHashRangeToFile(index, index);
    ResizeTableIfNecessary();
  }
}

void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
  for (std::vector<GURL>::const_iterator i = url.begin();
       i != url.end(); ++i) {
    Hash index = TryToAddURL(*i);
    if (!table_builder_ && index != null_hash_)
      ResizeTableIfNecessary();
  }

  // Keeps the file on disk up-to-date.
  if (!table_builder_)
    WriteFullTable();
}

void VisitedLinkMaster::DeleteAllURLs() {
  // Any pending modifications are invalid.
  added_since_rebuild_.clear();
  deleted_since_rebuild_.clear();

  // Clear the hash table.
  used_items_ = 0;
  memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));

  // Resize it if it is now too empty. Resize may write the new table out for
  // us, otherwise, schedule writing the new table to disk ourselves.
  if (!ResizeTableIfNecessary())
    WriteFullTable();
}

void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) {
  typedef std::set<GURL>::const_iterator SetIterator;

  if (urls.empty())
    return;

  if (table_builder_) {
    // A rebuild is in progress, save this deletion in the temporary list so
    // it can be added once rebuild is complete.
    for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
      if (!i->is_valid())
        continue;

      Fingerprint fingerprint =
          ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_);
      deleted_since_rebuild_.insert(fingerprint);

      // If the URL was just added and now we're deleting it, it may be in the
      // list of things added since the last rebuild. Delete it from that list.
      std::set<Fingerprint>::iterator found =
          added_since_rebuild_.find(fingerprint);
      if (found != added_since_rebuild_.end())
        added_since_rebuild_.erase(found);

      // Delete the URLs from the in-memory table, but don't bother writing
      // to disk since it will be replaced soon.
      DeleteFingerprint(fingerprint, false);
    }
    return;
  }

  // Compute the deleted URLs' fingerprints and delete them
  std::set<Fingerprint> deleted_fingerprints;
  for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
    if (!i->is_valid())
      continue;
    deleted_fingerprints.insert(
        ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_));
  }
  DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
}

// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
    Fingerprint fingerprint) {
  if (!hash_table_ || table_length_ == 0) {
    NOTREACHED();  // Not initialized.
    return null_hash_;
  }

  Hash cur_hash = HashFingerprint(fingerprint);
  Hash first_hash = cur_hash;
  while (true) {
    Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
    if (cur_fingerprint == fingerprint)
      return null_hash_;  // This fingerprint is already in there, do nothing.

    if (cur_fingerprint == null_fingerprint_) {
      // End of probe sequence found, insert here.
      hash_table_[cur_hash] = fingerprint;
      used_items_++;
      return cur_hash;
    }

    // Advance in the probe sequence.
    cur_hash = IncrementHash(cur_hash);
    if (cur_hash == first_hash) {
      // This means that we've wrapped around and are about to go into an
      // infinite loop. Something was wrong with the hashtable resizing
      // logic, so stop here.
      NOTREACHED();
      return null_hash_;
    }
  }
}

void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
    const std::set<Fingerprint>& fingerprints) {
  bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);

  // Delete the URLs from the table.
  for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
       i != fingerprints.end(); ++i)
    DeleteFingerprint(*i, !bulk_write);

  // These deleted fingerprints may make us shrink the table.
  if (ResizeTableIfNecessary())
    return;  // The resize function wrote the new table to disk for us.

  // Nobody wrote this out for us, write the full file to disk.
  if (bulk_write)
    WriteFullTable();
}

bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
                                          bool update_file) {
  if (!hash_table_ || table_length_ == 0) {
    NOTREACHED();  // Not initialized.
    return false;
  }
  if (!IsVisited(fingerprint))
    return false;  // Not in the database to delete.

  // First update the header used count.
  used_items_--;
  if (update_file)
    WriteUsedItemCountToFile();

  Hash deleted_hash = HashFingerprint(fingerprint);

  // Find the range of "stuff" in the hash table that is adjacent to this
  // fingerprint. These are things that could be affected by the change in
  // the hash table. Since we use linear probing, anything after the deleted
  // item up until an empty item could be affected.
  Hash end_range = deleted_hash;
  while (true) {
    Hash next_hash = IncrementHash(end_range);
    if (next_hash == deleted_hash)
      break;  // We wrapped around and the whole table is full.
    if (!hash_table_[next_hash])
      break;  // Found the last spot.
    end_range = next_hash;
  }

  // We could get all fancy and move the affected fingerprints around, but
  // instead we just remove them all and re-add them (minus our deleted one).
  // This will mean there's a small window of time where the affected links
  // won't be marked visited.
  StackVector<Fingerprint, 32> shuffled_fingerprints;
  Hash stop_loop = IncrementHash(end_range);  // The end range is inclusive.
  for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
    if (hash_table_[i] != fingerprint) {
      // Don't save the one we're deleting!
      shuffled_fingerprints->push_back(hash_table_[i]);

      // This will balance the increment of this value in AddFingerprint below
      // so there is no net change.
      used_items_--;
    }
    hash_table_[i] = null_fingerprint_;
  }

  if (!shuffled_fingerprints->empty()) {
    // Need to add the new items back.
    for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
      AddFingerprint(shuffled_fingerprints[i]);
  }

  // Write the affected range to disk [deleted_hash, end_range].
  if (update_file)
    WriteHashRangeToFile(deleted_hash, end_range);

  return true;
}

bool VisitedLinkMaster::WriteFullTable() {
  // This function can get called when the file is open, for example, when we
  // resize the table. We must handle this case and not try to reopen the file,
  // since there may be write operations pending on the file I/O thread.
  //
  // Note that once we start writing, we do not delete on error. This means
  // there can be a partial file, but the short file will be detected next time
  // we start, and will be replaced.
  //
  // This might possibly get corrupted if we crash in the middle of writing.
  // We should pick up the most common types of these failures when we notice
  // that the file size is different when we load it back in, and then we will
  // regenerate the table.
  if (!file_) {
    FilePath filename;
    GetDatabaseFileName(&filename);
    file_ = OpenFile(filename, "wb+");
    if (!file_) {
      DLOG(ERROR) << "Failed to open file " << filename.value();
      return false;
    }
  }

  // Write the new header.
  int32 header[4];
  header[0] = kFileSignature;
  header[1] = kFileCurrentVersion;
  header[2] = table_length_;
  header[3] = used_items_;
  WriteToFile(file_, 0, header, sizeof(header));
  WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);

  // Write the hash data.
  WriteToFile(file_, kFileHeaderSize,
              hash_table_, table_length_ * sizeof(Fingerprint));

  // The hash table may have shrunk, so make sure this is the end.
  if (file_thread_) {
    AsyncSetEndOfFile* setter = new AsyncSetEndOfFile(file_);
    file_thread_->PostTask(FROM_HERE, setter);
  } else {
    TruncateFile(file_);
  }

  return true;
}

bool VisitedLinkMaster::InitFromFile() {
  DCHECK(file_ == NULL);

  FilePath filename;
  GetDatabaseFileName(&filename);
  ScopedFILE file_closer(OpenFile(filename, "rb+"));
  if (!file_closer.get())
    return false;

  int32 num_entries, used_count;
  if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
    return false;  // Header isn't valid.

  // Allocate and read the table.
  if (!CreateURLTable(num_entries, false))
    return false;
  if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
                    hash_table_, num_entries * sizeof(Fingerprint))) {
    FreeURLTable();
    return false;
  }
  used_items_ = used_count;

#ifndef NDEBUG
  DebugValidate();
#endif

  file_ = file_closer.release();
  return true;
}

bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
  int32 table_size = kDefaultTableSize;
  if (table_size_override_)
    table_size = table_size_override_;

  // The salt must be generated before the table so that it can be copied to
  // the shared memory.
  GenerateSalt(salt_);
  if (!CreateURLTable(table_size, true))
    return false;

#ifndef NDEBUG
  DebugValidate();
#endif

  if (suppress_rebuild) {
    // When we disallow rebuilds (normally just unit tests), just use the
    // current empty table.
    return WriteFullTable();
  }

  // This will build the table from history. On the first run, history will
  // be empty, so this will be correct. This will also write the new table
  // to disk. We don't want to save explicitly here, since the rebuild may
  // not complete, leaving us with an empty but valid visited link database.
  // In the future, we won't know we need to try rebuilding again.
  return RebuildTableFromHistory();
}

bool VisitedLinkMaster::ReadFileHeader(FILE* file,
                                       int32* num_entries,
                                       int32* used_count,
                                       uint8 salt[LINK_SALT_LENGTH]) {
  // Get file size.
  // Note that there is no need to seek back to the original location in the
  // file since ReadFromFile() [which is the next call accessing the file]
  // seeks before reading.
  if (fseek(file, 0, SEEK_END) == -1)
    return false;
  size_t file_size = ftell(file);

  if (file_size <= kFileHeaderSize)
    return false;

  uint8 header[kFileHeaderSize];
  if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
    return false;

  // Verify the signature.
  int32 signature;
  memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
  if (signature != kFileSignature)
    return false;

  // Verify the version is up-to-date. As with other read errors, a version
  // mistmatch will trigger a rebuild of the database from history, which will
  // have the effect of migrating the database.
  int32 version;
  memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
  if (version != kFileCurrentVersion)
    return false;  // Bad version.

  // Read the table size and make sure it matches the file size.
  memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
  if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
    return false;  // Bad size.

  // Read the used item count.
  memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
  if (*used_count > *num_entries)
    return false;  // Bad used item count;

  // Read the salt.
  memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);

  // This file looks OK from the header's perspective.
  return true;
}

bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) {
  if (!database_name_override_.empty()) {
    // use this filename, the directory must exist
    *filename = database_name_override_;
    return true;
  }

  if (!profile_ || profile_->GetPath().empty())
    return false;

  FilePath profile_dir = profile_->GetPath();
  *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
  return true;
}

// Initializes the shared memory structure. The salt should already be filled
// in so that it can be written to the shared memory
bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
  // The table is the size of the table followed by the entries.
  int32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);

  // Create the shared memory object.
  shared_memory_ = new base::SharedMemory();
  if (!shared_memory_)
    return false;

  if (!shared_memory_->Create(GetSharedMemoryName().c_str(),
      false, false, alloc_size))
    return false;

  // Map into our process.
  if (!shared_memory_->Map(alloc_size)) {
    delete shared_memory_;
    shared_memory_ = NULL;
    return false;
  }

  if (init_to_empty) {
    memset(shared_memory_->memory(), 0, alloc_size);
    used_items_ = 0;
  }
  table_length_ = num_entries;

  // Save the header for other processes to read.
  SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
  header->length = table_length_;
  memcpy(header->salt, salt_, LINK_SALT_LENGTH);

  // Our table pointer is just the data immediately following the size.
  hash_table_ = reinterpret_cast<Fingerprint*>(
      static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));

  return true;
}

bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
  base::SharedMemory *old_shared_memory = shared_memory_;
  Fingerprint* old_hash_table = hash_table_;
  int32 old_table_length = table_length_;
  if (!CreateURLTable(num_entries, true)) {
    // Try to put back the old state.
    shared_memory_ = old_shared_memory;
    hash_table_ = old_hash_table;
    table_length_ = old_table_length;
    return false;
  }

#ifndef NDEBUG
  DebugValidate();
#endif

  return true;
}

void VisitedLinkMaster::FreeURLTable() {
  if (shared_memory_) {
    delete shared_memory_;
    shared_memory_ = NULL;
  }
  if (file_) {
    if (file_thread_) {
      AsyncCloseHandle* closer = new AsyncCloseHandle(file_);
      file_thread_->PostTask(FROM_HERE, closer);
    } else {
      fclose(file_);
    }
  }
}

bool VisitedLinkMaster::ResizeTableIfNecessary() {
  DCHECK(table_length_ > 0) << "Must have a table";

  // Load limits for good performance/space. We are pretty conservative about
  // keeping the table not very full. This is because we use linear probing
  // which increases the likelihood of clumps of entries which will reduce
  // performance.
  const float max_table_load = 0.5f;  // Grow when we're > this full.
  const float min_table_load = 0.2f;  // Shrink when we're < this full.

  float load = ComputeTableLoad();
  if (load < max_table_load &&
      (table_length_ <= static_cast<float>(kDefaultTableSize) ||
       load > min_table_load))
    return false;

  // Table needs to grow or shrink.
  int new_size = NewTableSizeForCount(used_items_);
  DCHECK(new_size > used_items_);
  DCHECK(load <= min_table_load || new_size > table_length_);
  ResizeTable(new_size);
  return true;
}

void VisitedLinkMaster::ResizeTable(int32 new_size) {
  DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
  shared_memory_serial_++;

#ifndef NDEBUG
  DebugValidate();
#endif

  base::SharedMemory* old_shared_memory = shared_memory_;
  Fingerprint* old_hash_table = hash_table_;
  int32 old_table_length = table_length_;
  if (!BeginReplaceURLTable(new_size))
    return;

  // Now we have two tables, our local copy which is the old one, and the new
  // one loaded into this object where we need to copy the data.
  for (int32 i = 0; i < old_table_length; i++) {
    Fingerprint cur = old_hash_table[i];
    if (cur)
      AddFingerprint(cur);
  }

  // On error unmapping, just forget about it since we can't do anything
  // else to release it.
  delete old_shared_memory;

  // Send an update notification to all child processes so they read the new
  // table.
  post_new_table_event_(shared_memory_);

#ifndef NDEBUG
  DebugValidate();
#endif

  // The new table needs to be written to disk.
  WriteFullTable();
}

uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
  // These table sizes are selected to be the maximum prime number less than
  // a "convenient" multiple of 1K.
  static const int table_sizes[] = {
      16381,    // 16K  = 16384   <- don't shrink below this table size
                //                   (should be == default_table_size)
      32767,    // 32K  = 32768
      65521,    // 64K  = 65536
      130051,   // 128K = 131072
      262127,   // 256K = 262144
      524269,   // 512K = 524288
      1048549,  // 1M   = 1048576
      2097143,  // 2M   = 2097152
      4194301,  // 4M   = 4194304
      8388571,  // 8M   = 8388608
      16777199,  // 16M  = 16777216
      33554347};  // 32M  = 33554432

  // Try to leave the table 33% full.
  int desired = item_count * 3;

  // Find the closest prime.
  for (size_t i = 0; i < arraysize(table_sizes); i ++) {
    if (table_sizes[i] > desired)
      return table_sizes[i];
  }

  // Growing very big, just approximate a "good" number, not growing as much
  // as normal.
  return item_count * 2 - 1;
}

// See the TableBuilder definition in the header file for how this works.
bool VisitedLinkMaster::RebuildTableFromHistory() {
  DCHECK(!table_builder_);
  if (table_builder_)
    return false;

  HistoryService* history_service = history_service_override_;
  if (!history_service && profile_) {
    history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
  }

  if (!history_service) {
    DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't "
                     "obtain a HistoryService.";
    return false;
  }

  // TODO(brettw) make sure we have reasonable salt!
  table_builder_ = new TableBuilder(this, salt_);

  // Make sure the table builder stays live during the call, even if the
  // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread.
  table_builder_->AddRef();
  history_service->IterateURLs(table_builder_);
  return true;
}

// See the TableBuilder definition in the header file for how this works.
void VisitedLinkMaster::OnTableRebuildComplete(
    bool success,
    const std::vector<Fingerprint>& fingerprints) {
  if (success) {
    // Replace the old table with a new blank one.
    shared_memory_serial_++;

    // We are responsible for freeing it AFTER it has been replaced if
    // replacement succeeds.
    base::SharedMemory* old_shared_memory = shared_memory_;

    int new_table_size = NewTableSizeForCount(
        static_cast<int>(fingerprints.size()));
    if (BeginReplaceURLTable(new_table_size)) {
      // Free the old table.
      delete old_shared_memory;

      // Add the stored fingerprints to the hash table.
      for (size_t i = 0; i < fingerprints.size(); i++)
        AddFingerprint(fingerprints[i]);

      // Also add anything that was added while we were asynchronously
      // generating the new table.
      for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
           i != added_since_rebuild_.end(); ++i)
        AddFingerprint(*i);
      added_since_rebuild_.clear();

      // Now handle deletions.
      DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
      deleted_since_rebuild_.clear();

      // Send an update notification to all child processes.
      post_new_table_event_(shared_memory_);

      WriteFullTable();
    }
  }
  table_builder_ = NULL;  // Will release our reference to the builder.

  // Notify the unit test that the rebuild is complete (will be NULL in prod.)
  if (rebuild_complete_task_.get()) {
    rebuild_complete_task_->Run();
    rebuild_complete_task_.reset(NULL);
  }
}

void VisitedLinkMaster::WriteToFile(FILE* file,
                                    off_t offset,
                                    void* data,
                                    int32 data_size) {
#ifndef NDEBUG
  posted_asynchronous_operation_ = true;
#endif

  if (file_thread_) {
    // Send the write to the other thread for execution to avoid blocking.
    AsyncWriter* writer = new AsyncWriter(file, offset, data, data_size);
    file_thread_->PostTask(FROM_HERE, writer);
  } else {
    // When there is no I/O thread, we are probably running in unit test mode,
    // just do the write synchronously.
    AsyncWriter::WriteToFile(file, offset, data, data_size);
  }
}

void VisitedLinkMaster::WriteUsedItemCountToFile() {
  if (!file_)
    return;  // See comment on the file_ variable for why this might happen.
  WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
}

void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
  if (!file_)
    return;  // See comment on the file_ variable for why this might happen.
  if (last_hash < first_hash) {
    // Handle wraparound at 0. This first write is first_hash->EOF
    WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
                &hash_table_[first_hash],
                (table_length_ - first_hash + 1) * sizeof(Fingerprint));

    // Now do 0->last_lash.
    WriteToFile(file_, kFileHeaderSize, hash_table_,
                (last_hash + 1) * sizeof(Fingerprint));
  } else {
    // Normal case, just write the range.
    WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
                &hash_table_[first_hash],
                (last_hash - first_hash + 1) * sizeof(Fingerprint));
  }
}

bool VisitedLinkMaster::ReadFromFile(FILE* file,
                                     off_t offset,
                                     void* data,
                                     size_t data_size) {
#ifndef NDEBUG
  // Since this function is synchronous, we require that no asynchronous
  // operations could possibly be pending.
  DCHECK(!posted_asynchronous_operation_);
#endif

  fseek(file, offset, SEEK_SET);

  size_t num_read = fread(data, 1, data_size, file);
  return num_read == data_size;
}

// VisitedLinkTableBuilder ----------------------------------------------------

VisitedLinkMaster::TableBuilder::TableBuilder(
    VisitedLinkMaster* master,
    const uint8 salt[LINK_SALT_LENGTH])
    : master_(master),
      main_message_loop_(MessageLoop::current()),
      success_(true) {
  fingerprints_.reserve(4096);
  memcpy(salt_, salt, sizeof(salt));
}

// TODO(brettw): Do we want to try to cancel the request if this happens? It
// could delay shutdown if there are a lot of URLs.
void VisitedLinkMaster::TableBuilder::DisownMaster() {
  master_ = NULL;
}

void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
  if (!url.is_empty()) {
    fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
        url.spec().data(), url.spec().length(), salt_));
  }
}

void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
  success_ = success;
  DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";

  // Marshal to the main thread to notify the VisitedLinkMaster that the
  // rebuild is complete.
  main_message_loop_->PostTask(FROM_HERE, NewRunnableMethod(this,
      &TableBuilder::OnCompleteMainThread));
}

void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
  if (master_)
    master_->OnTableRebuildComplete(success_, fingerprints_);

  // WILL (generally) DELETE THIS! This balances the AddRef in
  // VisitedLinkMaster::RebuildTableFromHistory.
  Release();
}