expire_history_backend.cc

// Copyright (c) 2010 The Chromium Authors. All rights reserved.// Use of this source code is governed by a BSD-style license that can be// found in the LICENSE file.#include "chrome/browser/history/expire_history_backend.h"#include <algorithm>#include <limits>#include "base/compiler_specific.h"#include "base/file_util.h"#include "base/message_loop.h"#include "chrome/browser/bookmarks/bookmark_service.h"#include "chrome/browser/history/archived_database.h"#include "chrome/browser/history/history_database.h"#include "chrome/browser/history/history_notifications.h"#include "chrome/browser/history/text_database_manager.h"#include "chrome/browser/history/thumbnail_database.h"#include "chrome/common/notification_type.h"using base::Time;
using base::TimeDelta;
namespace history {
namespace {
// The number of days by which the expiration threshold is advanced for items// that we want to expire early, such as those of AUTO_SUBFRAME transition type.constint kEarlyExpirationAdvanceDays = 30;
// Reads all types of visits starting from beginning of time to the given end// time. This is the most general reader.class AllVisitsReader : public ExpiringVisitsReader {
public:
virtualbool Read(Time end_time, HistoryDatabase* db,
VisitVector* visits, int max_visits) const {
DCHECK(db) << "must have a database to operate upon";
DCHECK(visits) << "visit vector has to exist in order to populate it";
db->GetAllVisitsInRange(Time(), end_time, max_visits, visits);
// When we got the maximum number of visits we asked for, we say there could// be additional things to expire now.returnstatic_cast<int>(visits->size()) == max_visits;
}
};
// Reads only AUTO_SUBFRAME visits, within a computed range. The range is// computed as follows:// * |begin_time| is read from the meta table. This value is updated whenever// there are no more additional visits to expire by this reader.// * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay),// but not past the current time.class AutoSubframeVisitsReader : public ExpiringVisitsReader {
public:
virtualbool Read(Time end_time, HistoryDatabase* db,
VisitVector* visits, int max_visits) const {
DCHECK(db) << "must have a database to operate upon";
DCHECK(visits) << "visit vector has to exist in order to populate it";
Time begin_time = db->GetEarlyExpirationThreshold();
// Advance |end_time| to expire early.
Time early_end_time = end_time +
TimeDelta::FromDays(kEarlyExpirationAdvanceDays);
// We don't want to set the early expiration threshold to a time in the// future.
Time now = Time::Now();
if (early_end_time > now)
early_end_time = now;
db->GetVisitsInRangeForTransition(begin_time, early_end_time,
max_visits,
PageTransition::AUTO_SUBFRAME,
visits);
bool more = static_cast<int>(visits->size()) == max_visits;
if (!more)
db->UpdateEarlyExpirationThreshold(early_end_time);
return more;
}
};
// Returns true if this visit is worth archiving. Otherwise, this visit is not// worth saving (for example, subframe navigations and redirects) and we can// just delete it when it gets old.bool ShouldArchiveVisit(const VisitRow& visit) {
int no_qualifier = PageTransition::StripQualifier(visit.transition);
// These types of transitions are always "important" and the user will want// to see them.if (no_qualifier == PageTransition::TYPED ||
no_qualifier == PageTransition::AUTO_BOOKMARK ||
no_qualifier == PageTransition::START_PAGE)
returntrue;
// Only archive these "less important" transitions when they were the final// navigation and not part of a redirect chain.if ((no_qualifier == PageTransition::LINK ||
no_qualifier == PageTransition::FORM_SUBMIT ||
no_qualifier == PageTransition::KEYWORD ||
no_qualifier == PageTransition::GENERATED) &&
visit.transition & PageTransition::CHAIN_END)
returntrue;
// The transition types we ignore are AUTO_SUBFRAME and MANUAL_SUBFRAME.returnfalse;
}
// The number of visits we will expire very time we check for old items. This// Prevents us from doing too much work any given time.constint kNumExpirePerIteration = 10;
// The number of seconds between checking for items that should be expired when// we think there might be more items to expire. This timeout is used when the// last expiration found at least kNumExpirePerIteration and we want to check// again "soon."constint kExpirationDelaySec = 30;
// The number of minutes between checking, as with kExpirationDelaySec, but// when we didn't find enough things to expire last time. If there was no// history to expire last iteration, it's likely there is nothing next// iteration, so we want to wait longer before checking to avoid wasting CPU.constint kExpirationEmptyDelayMin = 5;
} // namespacestruct ExpireHistoryBackend::DeleteDependencies {
// The time range affected. These can be is_null() to be unbounded in one// or both directions.
base::Time begin_time, end_time;
// ----- Filled by DeleteVisitRelatedInfo or manually if a function doesn't// call that function. -----// The unique URL rows affected by this delete.
std::map<URLID, URLRow> affected_urls;
// ----- Filled by DeleteOneURL -----// The URLs deleted during this operation.
std::vector<URLRow> deleted_urls;
// The list of all favicon IDs that the affected URLs had. Favicons will be// shared between all URLs with the same favicon, so this is the set of IDs// that we will need to check when the delete operations are complete.
std::set<FavIconID> affected_favicons;
// Tracks the set of databases that have changed so we can optimize when// when we're done.
TextDatabaseManager::ChangeSet text_db_changes;
};
ExpireHistoryBackend::ExpireHistoryBackend(
BroadcastNotificationDelegate* delegate,
BookmarkService* bookmark_service)
: delegate_(delegate),
main_db_(NULL),
archived_db_(NULL),
thumb_db_(NULL),
text_db_(NULL),
ALLOW_THIS_IN_INITIALIZER_LIST(factory_(this)),
bookmark_service_(bookmark_service) {
}
ExpireHistoryBackend::~ExpireHistoryBackend() {
}
void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db,
ArchivedDatabase* archived_db,
ThumbnailDatabase* thumb_db,
TextDatabaseManager* text_db) {
main_db_ = main_db;
archived_db_ = archived_db;
thumb_db_ = thumb_db;
text_db_ = text_db;
}
void ExpireHistoryBackend::DeleteURL(const GURL& url) {
if (!main_db_)
return;
URLRow url_row;
if (!main_db_->GetRowForURL(url, &url_row))
return; // Nothing to delete.// Collect all the visits and delete them. Note that we don't give up if// there are no visits, since the URL could still have an entry that we should// delete.// TODO(brettw): bug 1171148: We should also delete from the archived DB.
VisitVector visits;
main_db_->GetVisitsForURL(url_row.id(), &visits);
DeleteDependencies dependencies;
DeleteVisitRelatedInfo(visits, &dependencies);
// We skip ExpireURLsForVisits (since we are deleting from the URL, and not// starting with visits in a given time range). We therefore need to call the// deletion and favicon update functions manually.
BookmarkService* bookmark_service = GetBookmarkService();
bool is_bookmarked =
(bookmark_service && bookmark_service->IsBookmarked(url));
DeleteOneURL(url_row, is_bookmarked, &dependencies);
if (!is_bookmarked)
DeleteFaviconsIfPossible(dependencies.affected_favicons);
if (text_db_)
text_db_->OptimizeChangedDatabases(dependencies.text_db_changes);
BroadcastDeleteNotifications(&dependencies);
}
void ExpireHistoryBackend::ExpireHistoryBetween(
const std::set<GURL>& restrict_urls, Time begin_time, Time end_time) {
if (!main_db_)
return;
// There may be stuff in the text database manager's temporary cache.if (text_db_)
text_db_->DeleteFromUncommitted(restrict_urls, begin_time, end_time);
// Find the affected visits and delete them.// TODO(brettw): bug 1171164: We should query the archived database here, too.
VisitVector visits;
main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits);
if (!restrict_urls.empty()) {
std::set<URLID> url_ids;
for (std::set<GURL>::const_iterator url = restrict_urls.begin();
url != restrict_urls.end(); ++url)
url_ids.insert(main_db_->GetRowForURL(*url, NULL));
VisitVector all_visits;
all_visits.swap(visits);
for (VisitVector::iterator visit = all_visits.begin();
visit != all_visits.end(); ++visit) {
if (url_ids.find(visit->url_id) != url_ids.end())
visits.push_back(*visit);
}
}
if (visits.empty())
return;
DeleteDependencies dependencies;
DeleteVisitRelatedInfo(visits, &dependencies);
// Delete or update the URLs affected. We want to update the visit counts// since this is called by the user who wants to delete their recent history,// and we don't want to leave any evidence.
ExpireURLsForVisits(visits, &dependencies);
DeleteFaviconsIfPossible(dependencies.affected_favicons);
// An is_null begin time means that all history should be deleted.
BroadcastDeleteNotifications(&dependencies);
// Pick up any bits possibly left over.
ParanoidExpireHistory();
}
void ExpireHistoryBackend::ArchiveHistoryBefore(Time end_time) {
if (!main_db_)
return;
// Archive as much history as possible before the given date.
ArchiveSomeOldHistory(end_time, GetAllVisitsReader(),
std::numeric_limits<size_t>::max());
ParanoidExpireHistory();
}
void ExpireHistoryBackend::InitWorkQueue() {
DCHECK(work_queue_.empty()) << "queue has to be empty prior to init";
for (size_t i = 0; i < readers_.size(); i++)
work_queue_.push(readers_[i]);
}
const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() {
if (!all_visits_reader_.get())
all_visits_reader_.reset(new AllVisitsReader());
return all_visits_reader_.get();
}
const ExpiringVisitsReader*
ExpireHistoryBackend::GetAutoSubframeVisitsReader() {
if (!auto_subframe_visits_reader_.get())
auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader());
return auto_subframe_visits_reader_.get();
}
void ExpireHistoryBackend::StartArchivingOldStuff(
TimeDelta expiration_threshold) {
expiration_threshold_ = expiration_threshold;
// Remove all readers, just in case this was method was called before.
readers_.clear();
// For now, we explicitly add all known readers. If we come up with more// reader types (in case we want to expire different types of visits in// different ways), we can make it be populated by creator/owner of// ExpireHistoryBackend.
readers_.push_back(GetAllVisitsReader());
readers_.push_back(GetAutoSubframeVisitsReader());
// Initialize the queue with all tasks for the first set of iterations.
InitWorkQueue();
ScheduleArchive();
}
void ExpireHistoryBackend::DeleteFaviconsIfPossible(
const std::set<FavIconID>& favicon_set) {
if (!main_db_ || !thumb_db_)
return;
for (std::set<FavIconID>::const_iterator i = favicon_set.begin();
i != favicon_set.end(); ++i) {
if (!main_db_->IsFavIconUsed(*i))
thumb_db_->DeleteFavIcon(*i);
}
}
void ExpireHistoryBackend::BroadcastDeleteNotifications(
DeleteDependencies* dependencies) {
if (!dependencies->deleted_urls.empty()) {
// Broadcast the URL deleted notification. Note that we also broadcast when// we were requested to delete everything even if that was a NOP, since// some components care to know when history is deleted (it's up to them to// determine if they care whether anything was deleted).
URLsDeletedDetails* deleted_details = new URLsDeletedDetails;
deleted_details->all_history = false;
std::vector<URLRow> typed_urls_changed; // Collect this for later.for (size_t i = 0; i < dependencies->deleted_urls.size(); i++) {
deleted_details->urls.insert(dependencies->deleted_urls[i].url());
if (dependencies->deleted_urls[i].typed_count() > 0)
typed_urls_changed.push_back(dependencies->deleted_urls[i]);
}
delegate_->BroadcastNotifications(NotificationType::HISTORY_URLS_DELETED,
deleted_details);
// Broadcast the typed URL changed modification (this updates the inline// autocomplete database).//// Note: if we ever need to broadcast changes to more than just typed URLs,// this notification should be changed rather than a new "non-typed"// notification added. The in-memory database can always do the filtering// itself in that case.if (!typed_urls_changed.empty()) {
URLsModifiedDetails* modified_details = new URLsModifiedDetails;
modified_details->changed_urls.swap(typed_urls_changed);
delegate_->BroadcastNotifications(
NotificationType::HISTORY_TYPED_URLS_MODIFIED,
modified_details);
}
}
}
void ExpireHistoryBackend::DeleteVisitRelatedInfo(
const VisitVector& visits,
DeleteDependencies* dependencies) {
for (size_t i = 0; i < visits.size(); i++) {
// Delete the visit itself.
main_db_->DeleteVisit(visits[i]);
// Add the URL row to the affected URL list.
std::map<URLID, URLRow>::const_iterator found =
dependencies->affected_urls.find(visits[i].url_id);
const URLRow* cur_row = NULL;
if (found == dependencies->affected_urls.end()) {
URLRow row;
if (!main_db_->GetURLRow(visits[i].url_id, &row))
continue;
dependencies->affected_urls[visits[i].url_id] = row;
cur_row = &dependencies->affected_urls[visits[i].url_id];
} else {
cur_row = &found->second;
}
// Delete any associated full-text indexed data.if (visits[i].is_indexed && text_db_) {
text_db_->DeletePageData(visits[i].visit_time, cur_row->url(),
&dependencies->text_db_changes);
}
}
}
void ExpireHistoryBackend::DeleteOneURL(
const URLRow& url_row,
bool is_bookmarked,
DeleteDependencies* dependencies) {
main_db_->DeleteSegmentForURL(url_row.id());
// The URL may be in the text database manager's temporary cache.if (text_db_) {
std::set<GURL> restrict_urls;
restrict_urls.insert(url_row.url());
text_db_->DeleteFromUncommitted(restrict_urls, base::Time(), base::Time());
}
if (!is_bookmarked) {
dependencies->deleted_urls.push_back(url_row);
// Delete stuff that references this URL.if (thumb_db_)
thumb_db_->DeleteThumbnail(url_row.id());
// Collect shared information.if (url_row.favicon_id())
dependencies->affected_favicons.insert(url_row.favicon_id());
// Last, delete the URL entry.
main_db_->DeleteURLRow(url_row.id());
}
}
URLID ExpireHistoryBackend::ArchiveOneURL(const URLRow& url_row) {
if (!archived_db_)
return 0;
// See if this URL is present in the archived database already. Note that// we must look up by ID since the URL ID will be different.
URLRow archived_row;
if (archived_db_->GetRowForURL(url_row.url(), &archived_row)) {
// TODO(sky): bug 1168470, need to archive past search terms.// FIXME(brettw) should be copy the visit counts over? This will mean that// the main DB's visit counts are only for the last 3 months rather than// accumulative.
archived_row.set_last_visit(url_row.last_visit());
archived_db_->UpdateURLRow(archived_row.id(), archived_row);
return archived_row.id();
}
// This row is not in the archived DB, add it.return archived_db_->AddURL(url_row);
}
namespace {
struct ChangedURL {
ChangedURL() : visit_count(0), typed_count(0) {}
int visit_count;
int typed_count;
};
} // namespacevoid ExpireHistoryBackend::ExpireURLsForVisits(
const VisitVector& visits,
DeleteDependencies* dependencies) {
// First find all unique URLs and the number of visits we're deleting for// each one.
std::map<URLID, ChangedURL> changed_urls;
for (size_t i = 0; i < visits.size(); i++) {
ChangedURL& cur = changed_urls[visits[i].url_id];
cur.visit_count++;
// NOTE: This code must stay in sync with HistoryBackend::AddPageVisit().// TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as// typed, which would eliminate the need for this code.
PageTransition::Type transition =
PageTransition::StripQualifier(visits[i].transition);
if ((transition == PageTransition::TYPED &&
!PageTransition::IsRedirect(visits[i].transition)) ||
transition == PageTransition::KEYWORD_GENERATED)
cur.typed_count++;
}
// Check each unique URL with deleted visits.
BookmarkService* bookmark_service = GetBookmarkService();
for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin();
i != changed_urls.end(); ++i) {
// The unique URL rows should already be filled into the dependencies.
URLRow& url_row = dependencies->affected_urls[i->first];
if (!url_row.id())
continue; // URL row doesn't exist in the database.// Check if there are any other visits for this URL and update the time// (the time change may not actually be synced to disk below when we're// archiving).
VisitRow last_visit;
if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit))
url_row.set_last_visit(last_visit.visit_time);
else
url_row.set_last_visit(Time());
// Don't delete URLs with visits still in the DB, or bookmarked.bool is_bookmarked =
(bookmark_service && bookmark_service->IsBookmarked(url_row.url()));
if (!is_bookmarked && url_row.last_visit().is_null()) {
// Not bookmarked and no more visits. Nuke the url.
DeleteOneURL(url_row, is_bookmarked, dependencies);
} else {
// NOTE: The calls to std::max() below are a backstop, but they should// never actually be needed unless the database is corrupt (I think).
url_row.set_visit_count(
std::max(0, url_row.visit_count() - i->second.visit_count));
url_row.set_typed_count(
std::max(0, url_row.typed_count() - i->second.typed_count));
// Update the db with the new details.
main_db_->UpdateURLRow(url_row.id(), url_row);
}
}
}
void ExpireHistoryBackend::ArchiveURLsAndVisits(
const VisitVector& visits,
DeleteDependencies* dependencies) {
if (!archived_db_)
return;
// Make sure all unique URL rows are added to the dependency list and the// archived database. We will also keep the mapping between the main DB URLID// and the archived one.
std::map<URLID, URLID> main_id_to_archived_id;
for (size_t i = 0; i < visits.size(); i++) {
std::map<URLID, URLRow>::const_iterator found =
dependencies->affected_urls.find(visits[i].url_id);
if (found == dependencies->affected_urls.end()) {
// Unique URL encountered, archive it.
URLRow row; // Row in the main DB.
URLID archived_id; // ID in the archived DB.if (!main_db_->GetURLRow(visits[i].url_id, &row) ||
!(archived_id = ArchiveOneURL(row))) {
// Failure archiving, skip this one.continue;
}
// Only add URL to the dependency list once we know we successfully// archived it.
main_id_to_archived_id[row.id()] = archived_id;
dependencies->affected_urls[row.id()] = row;
}
}
// Now archive the visits since we know the URL ID to make them reference.// The source visit list should still reference the visits in the main DB, but// we will update it to reflect only the visits that were successfully// archived.for (size_t i = 0; i < visits.size(); i++) {
// Construct the visit that we will add to the archived database. We do// not store referring visits since we delete many of the visits when// archiving.
VisitRow cur_visit(visits[i]);
cur_visit.url_id = main_id_to_archived_id[cur_visit.url_id];
cur_visit.referring_visit = 0;
archived_db_->AddVisit(&cur_visit);
// Ignore failures, we will delete it from the main DB no matter what.
}
}
void ExpireHistoryBackend::ScheduleArchive() {
TimeDelta delay;
if (work_queue_.empty()) {
// If work queue is empty, reset the work queue to contain all tasks and// schedule next iteration after a longer delay.
InitWorkQueue();
delay = TimeDelta::FromMinutes(kExpirationEmptyDelayMin);
} else {
delay = TimeDelta::FromSeconds(kExpirationDelaySec);
}
factory_.RevokeAll();
MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod(
&ExpireHistoryBackend::DoArchiveIteration), delay.InMilliseconds());
}
void ExpireHistoryBackend::DoArchiveIteration() {
DCHECK(!work_queue_.empty()) << "queue has to be non-empty";
const ExpiringVisitsReader* reader = work_queue_.front();
bool more_to_expire = ArchiveSomeOldHistory(GetCurrentArchiveTime(), reader,
kNumExpirePerIteration);
work_queue_.pop();
// If there are more items to expire, add the reader back to the queue, thus// creating a new task for future iterations.if (more_to_expire)
work_queue_.push(reader);
ScheduleArchive();
}
bool ExpireHistoryBackend::ArchiveSomeOldHistory(
base::Time end_time,
const ExpiringVisitsReader* reader,
int max_visits) {
if (!main_db_)
returnfalse;
// Add an extra time unit to given end time, because// GetAllVisitsInRange, et al. queries' end value is non-inclusive.
Time effective_end_time =
Time::FromInternalValue(end_time.ToInternalValue() + 1);
VisitVector affected_visits;
bool more_to_expire = reader->Read(effective_end_time, main_db_,
&affected_visits, max_visits);
// Some visits we'll delete while others we'll archive.
VisitVector deleted_visits, archived_visits;
for (size_t i = 0; i < affected_visits.size(); i++) {
if (ShouldArchiveVisit(affected_visits[i]))
archived_visits.push_back(affected_visits[i]);
else
deleted_visits.push_back(affected_visits[i]);
}
// Do the actual archiving.
DeleteDependencies archived_dependencies;
ArchiveURLsAndVisits(archived_visits, &archived_dependencies);
DeleteVisitRelatedInfo(archived_visits, &archived_dependencies);
DeleteDependencies deleted_dependencies;
DeleteVisitRelatedInfo(deleted_visits, &deleted_dependencies);
// This will remove or archive all the affected URLs. Must do the deleting// cleanup before archiving so the delete dependencies structure references// only those URLs that were actually deleted instead of having some visits// archived and then the rest deleted.
ExpireURLsForVisits(deleted_visits, &deleted_dependencies);
ExpireURLsForVisits(archived_visits, &archived_dependencies);
// Create a union of all affected favicons (we don't store favicons for// archived URLs) and delete them.
std::set<FavIconID> affected_favicons(
archived_dependencies.affected_favicons);
for (std::set<FavIconID>::const_iterator i =
deleted_dependencies.affected_favicons.begin();
i != deleted_dependencies.affected_favicons.end(); ++i) {
affected_favicons.insert(*i);
}
DeleteFaviconsIfPossible(affected_favicons);
// Send notifications for the stuff that was deleted. These won't normally be// in history views since they were subframes, but they will be in the visited// link system, which needs to be updated now. This function is smart enough// to not do anything if nothing was deleted.
BroadcastDeleteNotifications(&deleted_dependencies);
return more_to_expire;
}
void ExpireHistoryBackend::ParanoidExpireHistory() {
// FIXME(brettw): Bug 1067331: write this to clean up any errors.
}
BookmarkService* ExpireHistoryBackend::GetBookmarkService() {
// We use the bookmark service to determine if a URL is bookmarked. The// bookmark service is loaded on a separate thread and may not be done by the// time we get here. We therefor block until the bookmarks have finished// loading.if (bookmark_service_)
bookmark_service_->BlockTillLoaded();
return bookmark_service_;
}
} // namespace history