// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/importer/firefox2_importer.h" #include #include #include "app/l10n_util.h" #include "base/file_path.h" #include "base/file_util.h" #include "base/i18n/icu_string_conversions.h" #include "base/message_loop.h" #include "base/path_service.h" #include "base/stl_util-inl.h" #include "base/utf_string_conversions.h" #include "base/values.h" #include "chrome/browser/history/history_types.h" #include "chrome/browser/importer/firefox_importer_utils.h" #include "chrome/browser/importer/importer_bridge.h" #include "chrome/browser/importer/mork_reader.h" #include "chrome/browser/importer/nss_decryptor.h" #include "chrome/browser/search_engines/template_url.h" #include "chrome/browser/search_engines/template_url_parser.h" #include "chrome/common/time_format.h" #include "chrome/common/url_constants.h" #include "grit/generated_resources.h" #include "net/base/data_url.h" #include "webkit/glue/password_form.h" using base::Time; using importer::BOOKMARKS_HTML; using importer::FAVORITES; using importer::HISTORY; using importer::HOME_PAGE; using importer::PASSWORDS; using importer::ProfileInfo; using importer::SEARCH_ENGINES; using webkit_glue::PasswordForm; // Firefox2Importer. Firefox2Importer::Firefox2Importer() : parsing_bookmarks_html_file_(false) { } Firefox2Importer::~Firefox2Importer() { } void Firefox2Importer::StartImport(ProfileInfo profile_info, uint16 items, ImporterBridge* bridge) { bridge_ = bridge; source_path_ = FilePath::FromWStringHack(profile_info.source_path); app_path_ = FilePath::FromWStringHack(profile_info.app_path); parsing_bookmarks_html_file_ = (profile_info.browser_type == BOOKMARKS_HTML); // The order here is important! bridge_->NotifyStarted(); if ((items & HOME_PAGE) && !cancelled()) ImportHomepage(); // Doesn't have a UI item. // Note history should be imported before bookmarks because bookmark import // will also import favicons and we store favicon for a URL only if the URL // exist in history or bookmarks. if ((items & HISTORY) && !cancelled()) { bridge_->NotifyItemStarted(HISTORY); ImportHistory(); bridge_->NotifyItemEnded(HISTORY); } if ((items & FAVORITES) && !cancelled()) { bridge_->NotifyItemStarted(FAVORITES); ImportBookmarks(); bridge_->NotifyItemEnded(FAVORITES); } if ((items & SEARCH_ENGINES) && !cancelled()) { bridge_->NotifyItemStarted(SEARCH_ENGINES); ImportSearchEngines(); bridge_->NotifyItemEnded(SEARCH_ENGINES); } if ((items & PASSWORDS) && !cancelled()) { bridge_->NotifyItemStarted(PASSWORDS); ImportPasswords(); bridge_->NotifyItemEnded(PASSWORDS); } bridge_->NotifyEnded(); } // static void Firefox2Importer::LoadDefaultBookmarks(const FilePath& app_path, std::set *urls) { FilePath file = app_path.AppendASCII("defaults") .AppendASCII("profile") .AppendASCII("bookmarks.html"); urls->clear(); // Read the whole file. std::string content; file_util::ReadFileToString(file, &content); std::vector lines; SplitString(content, '\n', &lines); std::string charset; for (size_t i = 0; i < lines.size(); ++i) { std::string line; TrimString(lines[i], " ", &line); // Get the encoding of the bookmark file. if (ParseCharsetFromLine(line, &charset)) continue; // Get the bookmark. std::wstring title; GURL url, favicon; std::wstring shortcut; Time add_date; std::wstring post_data; if (ParseBookmarkFromLine(line, charset, &title, &url, &favicon, &shortcut, &add_date, &post_data)) urls->insert(url); } } // static TemplateURL* Firefox2Importer::CreateTemplateURL(const std::wstring& title, const std::wstring& keyword, const GURL& url) { // Skip if the keyword or url is invalid. if (keyword.empty() && url.is_valid()) return NULL; TemplateURL* t_url = new TemplateURL(); // We set short name by using the title if it exists. // Otherwise, we use the shortcut. t_url->set_short_name(!title.empty() ? title : keyword); t_url->set_keyword(keyword); t_url->SetURL(TemplateURLRef::DisplayURLToURLRef(UTF8ToWide(url.spec())), 0, 0); return t_url; } // static void Firefox2Importer::ImportBookmarksFile( const FilePath& file_path, const std::set& default_urls, bool import_to_bookmark_bar, const std::wstring& first_folder_name, Importer* importer, std::vector* bookmarks, std::vector* template_urls, std::vector* favicons) { std::string content; file_util::ReadFileToString(file_path, &content); std::vector lines; SplitString(content, '\n', &lines); std::vector toolbar_bookmarks; std::wstring last_folder = first_folder_name; bool last_folder_on_toolbar = false; std::vector path; size_t toolbar_folder = 0; std::string charset; for (size_t i = 0; i < lines.size() && (!importer || !importer->cancelled()); ++i) { std::string line; TrimString(lines[i], " ", &line); // Get the encoding of the bookmark file. if (ParseCharsetFromLine(line, &charset)) continue; // Get the folder name. if (ParseFolderNameFromLine(line, charset, &last_folder, &last_folder_on_toolbar)) continue; // Get the bookmark entry. std::wstring title, shortcut; GURL url, favicon; Time add_date; std::wstring post_data; // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based // keywords yet. if (ParseBookmarkFromLine(line, charset, &title, &url, &favicon, &shortcut, &add_date, &post_data) && post_data.empty() && CanImportURL(GURL(url)) && default_urls.find(url) == default_urls.end()) { if (toolbar_folder > path.size() && path.size() > 0) { NOTREACHED(); // error in parsing. break; } ProfileWriter::BookmarkEntry entry; entry.creation_time = add_date; entry.url = url; entry.title = title; if (import_to_bookmark_bar && toolbar_folder) { // Flatten the items in toolbar. entry.in_toolbar = true; entry.path.assign(path.begin() + toolbar_folder, path.end()); toolbar_bookmarks.push_back(entry); } else { // Insert the item into the "Imported from Firefox" folder. entry.path.assign(path.begin(), path.end()); if (import_to_bookmark_bar) entry.path.erase(entry.path.begin()); bookmarks->push_back(entry); } // Save the favicon. DataURLToFaviconUsage will handle the case where // there is no favicon. if (favicons) DataURLToFaviconUsage(url, favicon, favicons); if (template_urls) { // If there is a SHORTCUT attribute for this bookmark, we // add it as our keywords. TemplateURL* t_url = CreateTemplateURL(title, shortcut, url); if (t_url) template_urls->push_back(t_url); } continue; } // Bookmarks in sub-folder are encapsulated with
tag. if (StartsWithASCII(line, "
", true)) { path.push_back(last_folder); last_folder.clear(); if (last_folder_on_toolbar && !toolbar_folder) toolbar_folder = path.size(); } else if (StartsWithASCII(line, "
", true)) { if (path.empty()) break; // Mismatch
. path.pop_back(); if (toolbar_folder > path.size()) toolbar_folder = 0; } } bookmarks->insert(bookmarks->begin(), toolbar_bookmarks.begin(), toolbar_bookmarks.end()); } void Firefox2Importer::ImportBookmarks() { // Load the default bookmarks. std::set default_urls; if (!parsing_bookmarks_html_file_) LoadDefaultBookmarks(app_path_, &default_urls); // Parse the bookmarks.html file. std::vector bookmarks, toolbar_bookmarks; std::vector template_urls; std::vector favicons; FilePath file = source_path_; if (!parsing_bookmarks_html_file_) file = file.AppendASCII("bookmarks.html"); std::wstring first_folder_name; if (parsing_bookmarks_html_file_) first_folder_name = l10n_util::GetString(IDS_BOOKMARK_GROUP); else first_folder_name = l10n_util::GetString(IDS_BOOKMARK_GROUP_FROM_FIREFOX); ImportBookmarksFile(file, default_urls, import_to_bookmark_bar(), first_folder_name, this, &bookmarks, &template_urls, &favicons); // Write data into profile. if (!bookmarks.empty() && !cancelled()) { int options = 0; if (import_to_bookmark_bar()) options = ProfileWriter::IMPORT_TO_BOOKMARK_BAR; bridge_->AddBookmarkEntries(bookmarks, first_folder_name, options); } if (!parsing_bookmarks_html_file_ && !template_urls.empty() && !cancelled()) { bridge_->SetKeywords(template_urls, -1, false); } else { STLDeleteContainerPointers(template_urls.begin(), template_urls.end()); } if (!favicons.empty()) { bridge_->SetFavIcons(favicons); } } void Firefox2Importer::ImportPasswords() { // Initializes NSS3. NSSDecryptor decryptor; if (!decryptor.Init(source_path_.ToWStringHack(), source_path_.ToWStringHack()) && !decryptor.Init(app_path_.ToWStringHack(), source_path_.ToWStringHack())) { return; } // Firefox 2 uses signons2.txt to store the pssswords. If it doesn't // exist, we try to find its older version. FilePath file = source_path_.AppendASCII("signons2.txt"); if (!file_util::PathExists(file)) { file = source_path_.AppendASCII("signons.txt"); } std::string content; file_util::ReadFileToString(file, &content); std::vector forms; decryptor.ParseSignons(content, &forms); if (!cancelled()) { for (size_t i = 0; i < forms.size(); ++i) { bridge_->SetPasswordForm(forms[i]); } } } void Firefox2Importer::ImportHistory() { FilePath file = source_path_.AppendASCII("history.dat"); ImportHistoryFromFirefox2(file, bridge_); } void Firefox2Importer::ImportSearchEngines() { std::vector files; GetSearchEnginesXMLFiles(&files); std::vector search_engines; ParseSearchEnginesFromXMLFiles(files, &search_engines); int default_index = GetFirefoxDefaultSearchEngineIndex(search_engines, source_path_); bridge_->SetKeywords(search_engines, default_index, true); } void Firefox2Importer::ImportHomepage() { GURL home_page = GetHomepage(source_path_); if (home_page.is_valid() && !IsDefaultHomepage(home_page, app_path_)) { bridge_->AddHomePage(home_page); } } void Firefox2Importer::GetSearchEnginesXMLFiles( std::vector* files) { // Search engines are contained in XML files in a searchplugins directory that // can be found in 2 locations: // - Firefox install dir (default search engines) // - the profile dir (user added search engines) FilePath dir = app_path_.AppendASCII("searchplugins"); FindXMLFilesInDir(dir, files); FilePath profile_dir = source_path_.AppendASCII("searchplugins"); FindXMLFilesInDir(profile_dir, files); } // static bool Firefox2Importer::ParseCharsetFromLine(const std::string& line, std::string* charset) { const char kCharset[] = "charset="; if (StartsWithASCII(line, "', end) + 1; // If no end tag or start tag is broken, we skip to find the folder name. if (end == std::string::npos || tag_end < arraysize(kFolderOpen)) return false; base::CodepageToWide(line.substr(tag_end, end - tag_end), charset.c_str(), base::OnStringConversionError::SKIP, folder_name); HTMLUnescape(folder_name); std::string attribute_list = line.substr(arraysize(kFolderOpen), tag_end - arraysize(kFolderOpen) - 1); std::string value; if (GetAttribute(attribute_list, kToolbarFolderAttribute, &value) && LowerCaseEqualsASCII(value, "true")) *is_toolbar_folder = true; else *is_toolbar_folder = false; return true; } // static bool Firefox2Importer::ParseBookmarkFromLine(const std::string& line, const std::string& charset, std::wstring* title, GURL* url, GURL* favicon, std::wstring* shortcut, Time* add_date, std::wstring* post_data) { const char kItemOpen[] = "
clear(); *url = GURL(); *favicon = GURL(); shortcut->clear(); post_data->clear(); *add_date = Time(); if (!StartsWithASCII(line, kItemOpen, true)) return false; size_t end = line.find(kItemClose); size_t tag_end = line.rfind('>', end) + 1; if (end == std::string::npos || tag_end < arraysize(kItemOpen)) return false; // No end tag or start tag is broken. std::string attribute_list = line.substr(arraysize(kItemOpen), tag_end - arraysize(kItemOpen) - 1); // We don't import Live Bookmark folders, which is Firefox's RSS reading // feature, since the user never necessarily bookmarked them and we don't // have this feature to update their contents. std::string value; if (GetAttribute(attribute_list, kFeedURLAttribute, &value)) return false; // Title base::CodepageToWide(line.substr(tag_end, end - tag_end), charset.c_str(), base::OnStringConversionError::SKIP, title); HTMLUnescape(title); // URL if (GetAttribute(attribute_list, kHrefAttribute, &value)) { std::wstring w_url; base::CodepageToWide(value, charset.c_str(), base::OnStringConversionError::SKIP, &w_url); HTMLUnescape(&w_url); string16 url16 = WideToUTF16Hack(w_url); *url = GURL(url16); } // Favicon if (GetAttribute(attribute_list, kIconAttribute, &value)) *favicon = GURL(value); // Keyword if (GetAttribute(attribute_list, kShortcutURLAttribute, &value)) { base::CodepageToWide(value, charset.c_str(), base::OnStringConversionError::SKIP, shortcut); HTMLUnescape(shortcut); } // Add date if (GetAttribute(attribute_list, kAddDateAttribute, &value)) { int64 time = StringToInt64(value); // Upper bound it at 32 bits. if (0 < time && time < (1LL << 32)) *add_date = Time::FromTimeT(time); } // Post data. if (GetAttribute(attribute_list, kPostDataAttribute, &value)) { base::CodepageToWide(value, charset.c_str(), base::OnStringConversionError::SKIP, post_data); HTMLUnescape(post_data); } return true; } // static bool Firefox2Importer::GetAttribute(const std::string& attribute_list, const std::string& attribute, std::string* value) { const char kQuote[] = "\""; size_t begin = attribute_list.find(attribute + "=" + kQuote); if (begin == std::string::npos) return false; // Can't find the attribute. begin = attribute_list.find(kQuote, begin) + 1; size_t end = begin + 1; while (end < attribute_list.size()) { if (attribute_list[end] == '"' && attribute_list[end - 1] != '\\') { break; } end++; } if (end == attribute_list.size()) return false; // The value is not quoted. *value = attribute_list.substr(begin, end - begin); return true; } // static void Firefox2Importer::HTMLUnescape(std::wstring *text) { string16 text16 = WideToUTF16Hack(*text); ReplaceSubstringsAfterOffset( &text16, 0, ASCIIToUTF16("<"), ASCIIToUTF16("<")); ReplaceSubstringsAfterOffset( &text16, 0, ASCIIToUTF16(">"), ASCIIToUTF16(">")); ReplaceSubstringsAfterOffset( &text16, 0, ASCIIToUTF16("&"), ASCIIToUTF16("&")); ReplaceSubstringsAfterOffset( &text16, 0, ASCIIToUTF16("""), ASCIIToUTF16("\"")); ReplaceSubstringsAfterOffset( &text16, 0, ASCIIToUTF16("'"), ASCIIToUTF16("\'")); text->assign(UTF16ToWideHack(text16)); } // static void Firefox2Importer::FindXMLFilesInDir( const FilePath& dir, std::vector* xml_files) { file_util::FileEnumerator file_enum(dir, false, file_util::FileEnumerator::FILES, FILE_PATH_LITERAL("*.xml")); FilePath file(file_enum.Next()); while (!file.empty()) { xml_files->push_back(file); file = file_enum.Next(); } } // static void Firefox2Importer::DataURLToFaviconUsage( const GURL& link_url, const GURL& favicon_data, std::vector* favicons) { if (!link_url.is_valid() || !favicon_data.is_valid() || !favicon_data.SchemeIs(chrome::kDataScheme)) return; // Parse the data URL. std::string mime_type, char_set, data; if (!net::DataURL::Parse(favicon_data, &mime_type, &char_set, &data) || data.empty()) return; history::ImportedFavIconUsage usage; if (!ReencodeFavicon(reinterpret_cast(&data[0]), data.size(), &usage.png_data)) return; // Unable to decode. // We need to make up a URL for the favicon. We use a version of the page's // URL so that we can be sure it will not collide. usage.favicon_url = GURL(std::string("made-up-favicon:") + link_url.spec()); // We only have one URL per favicon for Firefox 2 bookmarks. usage.urls.insert(link_url); favicons->push_back(usage); }