summaryrefslogtreecommitdiffstats
path: root/chrome/browser/mork_reader.cc
diff options
context:
space:
mode:
authorbeng@google.com <beng@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-09-13 00:56:27 +0000
committerbeng@google.com <beng@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-09-13 00:56:27 +0000
commit6c9851a40d3f6280dc322c2392d9cfcf8fba1b2d (patch)
tree91072da4d7f80596bcc437e82685cf7de7944dfe /chrome/browser/mork_reader.cc
parent231d5a36e476d013a91ca742bb8a0a2973cfee54 (diff)
downloadchromium_src-6c9851a40d3f6280dc322c2392d9cfcf8fba1b2d.zip
chromium_src-6c9851a40d3f6280dc322c2392d9cfcf8fba1b2d.tar.gz
chromium_src-6c9851a40d3f6280dc322c2392d9cfcf8fba1b2d.tar.bz2
Move importer files into an importer subdirectory.
Also delete title chomper no one uses it. B=2205 Review URL: http://codereview.chromium.org/3035 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@2154 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/mork_reader.cc')
-rw-r--r--chrome/browser/mork_reader.cc581
1 files changed, 0 insertions, 581 deletions
diff --git a/chrome/browser/mork_reader.cc b/chrome/browser/mork_reader.cc
deleted file mode 100644
index 9a59cf0..0000000
--- a/chrome/browser/mork_reader.cc
+++ /dev/null
@@ -1,581 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is the Mork Reader.
- *
- * The Initial Developer of the Original Code is
- * Google Inc.
- * Portions created by the Initial Developer are Copyright (C) 2006
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- * Brian Ryner <bryner@brianryner.com> (original author)
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-// Source:
-// http://mxr.mozilla.org/firefox/source/db/morkreader/nsMorkReader.cpp
-// This file has been converted to google style.
-
-#include "chrome/browser/mork_reader.h"
-
-#include <algorithm>
-
-#include "base/logging.h"
-#include "base/string_util.h"
-#include "chrome/browser/firefox_importer_utils.h"
-#include "chrome/browser/history/history_types.h"
-
-namespace {
-
-// Convert a hex character (0-9, A-F) to its corresponding byte value.
-// Returns -1 if the character is invalid.
-inline int HexCharToInt(char c) {
- if ('0' <= c && c <= '9')
- return c - '0';
- if ('A' <= c && c <= 'F')
- return c - 'A' + 10;
- return -1;
-}
-
-// Unescape a Mork value. Mork uses $xx escaping to encode non-ASCII
-// characters. Additionally, '$' and '\' are backslash-escaped.
-// The result of the unescape is in returned.
-std::string MorkUnescape(const std::string& input) {
- // We optimize for speed over space here -- size the result buffer to
- // the size of the source, which is an upper bound on the size of the
- // unescaped string.
- std::string result;
- size_t input_length = input.size();
- result.reserve(input_length);
-
- for (size_t i = 0; i < input_length; i++) {
- char c = input[i];
- if (c == '\\') {
- // Escaped literal, slip the backslash, append the next character.
- i++;
- if (i < input_length)
- result.push_back(input[i]);
- } else if (c == '$') {
- // Dollar sign denotes a hex character.
- if (i < input_length - 2) {
- // Would be nice to use ToInteger() here, but it currently
- // requires a null-terminated string.
- int first = HexCharToInt(input[++i]);
- int second = HexCharToInt(input[++i]);
- if (first >= 0 && second >= 0)
- result.push_back((first << 4) | second);
- }
- } else {
- // Regular character, just append.
- result.push_back(input[i]);
- }
- }
- return result;
-}
-
-} // namespace
-
-MorkReader::MorkReader() {
-}
-
-MorkReader::~MorkReader() {
- // Need to delete all the pointers to vectors we have in the table.
- for (RowMap::iterator i = table_.begin(); i != table_.end(); ++i)
- delete i->second;
-}
-
-bool MorkReader::Read(const std::wstring& filename) {
- stream_.open(filename.c_str());
- if (!stream_.is_open())
- return false;
-
- std::string line;
- if (!ReadLine(&line) ||
- line.compare("// <!-- <mdb:mork:z v=\"1.4\"/> -->") != 0)
- return false; // Unexpected file format.
-
- IndexMap column_map;
- while (ReadLine(&line)) {
- // Trim off leading spaces
- size_t idx = 0;
- size_t len = line.size();
- while (idx < len && line[idx] == ' ')
- ++idx;
- if (idx >= len)
- continue;
-
- // Look at the line to figure out what section type this is
- if (StartsWithASCII(&line[idx], "< <(a=c)>", true)) {
- // Column map. We begin by creating a hash of column id to column name.
- StringMap column_name_map;
- ParseMap(line, idx, &column_name_map);
-
- // Now that we have the list of columns, we put them into a flat array.
- // Rows will have value arrays of the same size, with indexes that
- // correspond to the columns array. As we insert each column into the
- // array, we also make an entry in columnMap so that we can look up the
- // index given the column id.
- columns_.reserve(column_name_map.size());
-
- for (StringMap::const_iterator i = column_name_map.begin();
- i != column_name_map.end(); ++i) {
- column_map[i->first] = static_cast<int>(columns_.size());
- MorkColumn col(i->first, i->second);
- columns_.push_back(col);
- }
- } else if (StartsWithASCII(&line[idx], "<(", true)) {
- // Value map.
- ParseMap(line, idx, &value_map_);
- } else if (line[idx] == '{' || line[idx] == '[') {
- // Table / table row.
- ParseTable(line, idx, &column_map);
- } else {
- // Don't know, hopefully don't care.
- }
- }
- return true;
-}
-
-// Parses a key/value map of the form
-// <(k1=v1)(k2=v2)...>
-bool MorkReader::ParseMap(const std::string& first_line,
- size_t start_index,
- StringMap* map) {
- // If the first line is the a=c line (column map), just skip over it.
- std::string line(first_line);
- if (StartsWithASCII(line, "< <(a=c)>", true))
- ReadLine(&line);
-
- std::string key;
- do {
- size_t idx = start_index;
- size_t len = line.size();
- size_t token_start;
-
- while (idx < len) {
- switch (line[idx++]) {
- case '(':
- // Beginning of a key/value pair.
- if (!key.empty()) {
- DLOG(WARNING) << "unterminated key/value pair?";
- key.clear();
- }
-
- token_start = idx;
- while (idx < len && line[idx] != '=')
- ++idx;
- key.assign(&line[token_start], idx - token_start);
- break;
-
- case '=': {
- // Beginning of the value.
- if (key.empty()) {
- DLOG(WARNING) << "stray value";
- break;
- }
-
- token_start = idx;
- while (idx < len && line[idx] != ')') {
- if (line[idx] == '\\')
- ++idx; // Skip escaped ')' characters.
- ++idx;
- }
- size_t token_end = std::min(idx, len);
- ++idx;
-
- std::string value = MorkUnescape(
- std::string(&line[token_start], token_end - token_start));
- (*map)[key] = value;
- key.clear();
- break;
- }
- case '>':
- // End of the map.
- DLOG_IF(WARNING, key.empty()) <<
- "map terminates inside of key/value pair";
- return true;
- }
- }
-
- // We should start reading the next line at the beginning.
- start_index = 0;
- } while (ReadLine(&line));
-
- // We ran out of lines and the map never terminated. This probably indicates
- // a parsing error.
- DLOG(WARNING) << "didn't find end of key/value map";
- return false;
-}
-
-// Parses a table row of the form [123(^45^67)..]
-// (row id 123 has the value with id 67 for the column with id 45).
-// A '^' prefix for a column or value references an entry in the column or
-// value map. '=' is used as the separator when the value is a literal.
-void MorkReader::ParseTable(const std::string& first_line,
- size_t start_index,
- const IndexMap* column_map) {
- std::string line(first_line);
-
- // Column index of the cell we're parsing, minus one if invalid.
- int column_index = -1;
-
- // Points to the current row we're parsing inside of the |table_|, will be
- // NULL if we're not inside a row.
- ColumnDataList* current_row = NULL;
-
- bool in_meta_row = false;
-
- do {
- size_t idx = start_index;
- size_t len = line.size();
-
- while (idx < len) {
- switch (line[idx++]) {
- case '{':
- // This marks the beginning of a table section. There's a lot of
- // junk before the first row that looks like cell values but isn't.
- // Skip to the first '['.
- while (idx < len && line[idx] != '[') {
- if (line[idx] == '{') {
- in_meta_row = true; // The meta row is enclosed in { }
- } else if (line[idx] == '}') {
- in_meta_row = false;
- }
- ++idx;
- }
- break;
-
- case '[': {
- // Start of a new row. Consume the row id, up to the first '('.
- // Row edits also have a table namespace, separated from the row id
- // by a colon. We don't make use of the namespace, but we need to
- // make sure not to consider it part of the row id.
- if (current_row) {
- DLOG(WARNING) << "unterminated row?";
- current_row = NULL;
- }
-
- // Check for a '-' at the start of the id. This signifies that
- // if the row already exists, we should delete all columns from it
- // before adding the new values.
- bool cut_columns;
- if (idx < len && line[idx] == '-') {
- cut_columns = true;
- ++idx;
- } else {
- cut_columns = false;
- }
-
- // Locate the range of the ID.
- size_t token_start = idx; // Index of the first char of the token.
- while (idx < len &&
- line[idx] != '(' &&
- line[idx] != ']' &&
- line[idx] != ':') {
- ++idx;
- }
- size_t token_end = idx; // Index of the char following the token.
- while (idx < len && line[idx] != '(' && line[idx] != ']') {
- ++idx;
- }
-
- if (in_meta_row) {
- // Need to create the meta row.
- meta_row_.resize(columns_.size());
- current_row = &meta_row_;
- } else {
- // Find or create the regular row for this.
- IDString row_id(&line[token_start], token_end - token_start);
- RowMap::iterator found_row = table_.find(row_id);
- if (found_row == table_.end()) {
- // We don't already have this row, create a new one for it.
- current_row = new ColumnDataList(columns_.size());
- table_[row_id] = current_row;
- } else {
- // The row already exists and we're adding/replacing things.
- current_row = found_row->second;
- }
- }
- if (cut_columns) {
- for (size_t i = 0; i < current_row->size(); ++i)
- (*current_row)[i].clear();
- }
- break;
- }
-
- case ']':
- // We're done with the row.
- current_row = NULL;
- in_meta_row = false;
- break;
-
- case '(': {
- if (!current_row) {
- DLOG(WARNING) << "cell value outside of row";
- break;
- }
-
- bool column_is_atom;
- if (line[idx] == '^') {
- column_is_atom = true;
- ++idx; // This is not part of the column id, advance past it.
- } else {
- column_is_atom = false;
- }
- size_t token_start = idx;
- while (idx < len && line[idx] != '^' && line[idx] != '=') {
- if (line[idx] == '\\')
- ++idx; // Skip escaped characters.
- ++idx;
- }
-
- size_t token_end = std::min(idx, len);
-
- IDString column;
- if (column_is_atom)
- column.assign(&line[token_start], token_end - token_start);
- else
- column = MorkUnescape(line.substr(token_start,
- token_end - token_start));
-
- IndexMap::const_iterator found_column = column_map->find(column);
- if (found_column == column_map->end()) {
- DLOG(WARNING) << "Column not in column map, discarding it";
- column_index = -1;
- } else {
- column_index = found_column->second;
- }
- break;
- }
-
- case '=':
- case '^': {
- if (column_index == -1) {
- DLOG(WARNING) << "stray ^ or = marker";
- break;
- }
-
- bool value_is_atom = (line[idx - 1] == '^');
- size_t token_start = idx - 1; // Include the '=' or '^' marker.
- while (idx < len && line[idx] != ')') {
- if (line[idx] == '\\')
- ++idx; // Skip escaped characters.
- ++idx;
- }
- size_t token_end = std::min(idx, len);
- ++idx;
-
- if (value_is_atom) {
- (*current_row)[column_index].assign(&line[token_start],
- token_end - token_start);
- } else {
- (*current_row)[column_index] =
- MorkUnescape(line.substr(token_start, token_end - token_start));
- }
- column_index = -1;
- }
- break;
- }
- }
-
- // Start parsing the next line at the beginning.
- start_index = 0;
- } while (current_row && ReadLine(&line));
-}
-
-bool MorkReader::ReadLine(std::string* line) {
- line->resize(256);
- std::getline(stream_, *line);
- if (stream_.eof() || stream_.bad())
- return false;
-
- while (!line->empty() && (*line)[line->size() - 1] == '\\') {
- // There is a continuation for this line. Read it and append.
- std::string new_line;
- std::getline(stream_, new_line);
- if (stream_.eof())
- return false;
- line->erase(line->size() - 1);
- line->append(new_line);
- }
-
- return true;
-}
-
-void MorkReader::NormalizeValue(std::string* value) const {
- if (value->empty())
- return;
- MorkReader::StringMap::const_iterator i;
- switch (value->at(0)) {
- case '^':
- // Hex ID, lookup the name for it in the |value_map_|.
- i = value_map_.find(value->substr(1));
- if (i == value_map_.end())
- value->clear();
- else
- *value = i->second;
- break;
- case '=':
- // Just use the literal after the equals sign.
- value->erase(value->begin());
- break;
- default:
- // Anything else is invalid.
- value->clear();
- break;
- }
-}
-
-// Source:
-// http://mxr.mozilla.org/firefox/source/toolkit/components/places/src/nsMorkHistoryImporter.cpp
-
-// Columns for entry (non-meta) history rows
-enum {
- kURLColumn,
- kNameColumn,
- kVisitCountColumn,
- kHiddenColumn,
- kTypedColumn,
- kLastVisitColumn,
- kColumnCount // Keep me last.
-};
-
-static const char * const gColumnNames[] = {
- "URL", "Name", "VisitCount", "Hidden", "Typed", "LastVisitDate"
-};
-
-struct TableReadClosure {
- explicit TableReadClosure(const MorkReader& r)
- : reader(r),
- swap_bytes(false),
- byte_order_column(-1) {
- for (int i = 0; i < kColumnCount; ++i)
- column_indexes[i] = -1;
- }
-
- // Backpointers to the reader and history we're operating on.
- const MorkReader& reader;
-
- // Whether we need to swap bytes (file format is other-endian).
- bool swap_bytes;
-
- // Indexes of the columns that we care about.
- int column_indexes[kColumnCount];
- int byte_order_column;
-};
-
-void AddToHistory(MorkReader::ColumnDataList* column_values,
- const TableReadClosure& data,
- std::vector<history::URLRow>* rows) {
- std::string values[kColumnCount];
-
- for (size_t i = 0; i < kColumnCount; ++i) {
- if (data.column_indexes[i] != -1) {
- values[i] = column_values->at(data.column_indexes[i]);
- data.reader.NormalizeValue(&values[i]);
- // Do not import hidden records.
- if (i == kHiddenColumn && values[i] == "1")
- return;
- }
- }
-
- GURL url(values[kURLColumn]);
-
- if (CanImportURL(url)) {
- history::URLRow row(url);
-
- // title is really a UTF-16 string at this point
- std::wstring title;
- if (data.swap_bytes) {
- CodepageToWide(values[kNameColumn], "UTF-16BE",
- OnStringUtilConversionError::SKIP, &title);
- } else {
- CodepageToWide(values[kNameColumn], "UTF-16LE",
- OnStringUtilConversionError::SKIP, &title);
- }
- row.set_title(title);
-
- int count = atoi(values[kVisitCountColumn].c_str());
- if (count == 0)
- count = 1;
- row.set_visit_count(count);
-
- time_t date = StringToInt64(values[kLastVisitColumn]);
- if (date != 0)
- row.set_last_visit(Time::FromTimeT(date/1000000));
-
- bool is_typed = (values[kTypedColumn] == "1");
- if (is_typed)
- row.set_typed_count(1);
-
- rows->push_back(row);
- }
-}
-
-// It sets up the file stream and loops over the lines in the file to
-// parse them, then adds the resulting row set to history.
-void ImportHistoryFromFirefox2(std::wstring file, MessageLoop* loop,
- ProfileWriter* writer) {
- MorkReader reader;
- reader.Read(file);
-
- // Gather up the column ids so we don't need to find them on each row
- TableReadClosure data(reader);
- const MorkReader::MorkColumnList& columns = reader.columns();
- for (size_t i = 0; i < columns.size(); ++i) {
- for (int j = 0; j < kColumnCount; ++j)
- if (columns[i].name == gColumnNames[j]) {
- data.column_indexes[j] = static_cast<int>(i);
- break;
- }
- if (columns[i].name == "ByteOrder")
- data.byte_order_column = static_cast<int>(i);
- }
-
- // Determine the byte order from the table's meta-row.
- const MorkReader::ColumnDataList& meta_row = reader.meta_row();
- if (!meta_row.empty() && data.byte_order_column != -1) {
- std::string byte_order = meta_row[data.byte_order_column];
- if (!byte_order.empty()) {
- // Note whether the file uses a non-native byte ordering.
- // If it does, we'll have to swap bytes for PRUnichar values.
- // "BE" and "LE" are the only recognized values, anything
- // else is garbage and the file will be treated as native-endian
- // (no swapping).
- std::string byte_order_value(byte_order);
- reader.NormalizeValue(&byte_order_value);
- data.swap_bytes = (byte_order_value == "BE");
- }
- }
-
- std::vector<history::URLRow> rows;
- for (MorkReader::iterator i = reader.begin(); i != reader.end(); ++i)
- AddToHistory(i->second, data, &rows);
- if (!rows.empty())
- loop->PostTask(FROM_HERE, NewRunnableMethod(writer,
- &ProfileWriter::AddHistoryPage, rows));
-}