// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // Parse the data returned from the SafeBrowsing v2.1 protocol response. // TODOv3(shess): Review these changes carefully. #include #include "base/format_macros.h" #include "base/logging.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_split.h" #include "base/strings/stringprintf.h" #include "base/sys_byteorder.h" #include "base/time/time.h" #include "build/build_config.h" #include "chrome/browser/safe_browsing/protocol_parser.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" namespace { // Helper class for scanning a buffer. class BufferReader { public: BufferReader(const char* data, size_t length) : data_(data), length_(length) { } // Return info about remaining buffer data. size_t length() const { return length_; } const char* data() const { return data_; } bool empty() const { return length_ == 0; } // Remove |l| characters from the buffer. void Advance(size_t l) { DCHECK_LE(l, length()); data_ += l; length_ -= l; } // Get a reference to data in the buffer. // TODO(shess): I'm not sure I like this. Fill out a StringPiece instead? bool RefData(const void** pptr, size_t l) { if (length() < l) { Advance(length()); // poison return false; } *pptr = data(); Advance(l); return true; } // Copy data out of the buffer. bool GetData(void* ptr, size_t l) { const void* buf_ptr; if (!RefData(&buf_ptr, l)) return false; memcpy(ptr, buf_ptr, l); return true; } // Read a 32-bit integer in network byte order into a local uint32. bool GetNet32(uint32* i) { if (!GetData(i, sizeof(*i))) return false; *i = base::NetToHost32(*i); return true; } // Returns false if there is no data, otherwise fills |*line| with a reference // to the next line of data in the buffer. bool GetLine(base::StringPiece* line) { if (!length_) return false; // Find the end of the line, or the end of the input. size_t eol = 0; while (eol < length_ && data_[eol] != '\n') { ++eol; } line->set(data_, eol); Advance(eol); // Skip the newline if present. if (length_ && data_[0] == '\n') Advance(1); return true; } // Read out |c| colon-separated pieces from the next line. The resulting // pieces point into the original data buffer. bool GetPieces(size_t c, std::vector* pieces) { base::StringPiece line; if (!GetLine(&line)) return false; // Find the parts separated by ':'. while (pieces->size() + 1 < c) { size_t colon_ofs = line.find(':'); if (colon_ofs == base::StringPiece::npos) { Advance(length_); return false; } pieces->push_back(line.substr(0, colon_ofs)); line.remove_prefix(colon_ofs + 1); } // The last piece runs to the end of the line. pieces->push_back(line); return true; } private: const char* data_; size_t length_; DISALLOW_COPY_AND_ASSIGN(BufferReader); }; bool ParseGetHashMetadata(size_t hash_count, BufferReader* reader, std::vector* full_hashes) { for (size_t i = 0; i < hash_count; ++i) { base::StringPiece line; if (!reader->GetLine(&line)) return false; size_t meta_data_len; if (!base::StringToSizeT(line, &meta_data_len)) return false; const void* meta_data; if (!reader->RefData(&meta_data, meta_data_len)) return false; if (full_hashes) { (*full_hashes)[full_hashes->size() - hash_count + i].metadata.assign( reinterpret_cast(meta_data), meta_data_len); } } return true; } } // namespace namespace safe_browsing { // BODY = CACHELIFETIME LF HASHENTRY* EOF // CACHELIFETIME = DIGIT+ // HASHENTRY = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF // HASHDATA (METADATALEN LF METADATA)* // HASHSIZE = DIGIT+ # Length of each full hash // NUMRESPONSES = DIGIT+ # Number of full hashes in HASHDATA // HASHDATA = // METADATALEN = DIGIT+ // METADATA = bool ParseGetHash(const char* chunk_data, size_t chunk_len, base::TimeDelta* cache_lifetime, std::vector* full_hashes) { full_hashes->clear(); BufferReader reader(chunk_data, chunk_len); // Parse out cache lifetime. { base::StringPiece line; if (!reader.GetLine(&line)) return false; int64_t cache_lifetime_seconds; if (!base::StringToInt64(line, &cache_lifetime_seconds)) return false; // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden, // either. Maybe there should be a threshold involved. if (cache_lifetime_seconds < 0) return false; *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds); } while (!reader.empty()) { std::vector cmd_parts; if (!reader.GetPieces(3, &cmd_parts)) return false; SBFullHashResult full_hash; full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]); size_t hash_len; if (!base::StringToSizeT(cmd_parts[1], &hash_len)) return false; // TODO(shess): Is this possible? If not, why the length present? if (hash_len != sizeof(SBFullHash)) return false; // Metadata is indicated by an optional ":m" at the end of the line. bool has_metadata = false; base::StringPiece hash_count_string = cmd_parts[2]; size_t optional_colon = hash_count_string.find(':', 0); if (optional_colon != base::StringPiece::npos) { if (hash_count_string.substr(optional_colon) != ":m") return false; has_metadata = true; hash_count_string.remove_suffix(2); } size_t hash_count; if (!base::StringToSizeT(hash_count_string, &hash_count)) return false; if (hash_len * hash_count > reader.length()) return false; // Ignore hash results from lists we don't recognize. if (full_hash.list_id < 0) { reader.Advance(hash_len * hash_count); if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, NULL)) return false; continue; } for (size_t i = 0; i < hash_count; ++i) { if (!reader.GetData(&full_hash.hash, hash_len)) return false; full_hashes->push_back(full_hash); } if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, full_hashes)) return false; } return reader.empty(); } // BODY = HEADER LF PREFIXES EOF // HEADER = PREFIXSIZE ":" LENGTH // PREFIXSIZE = DIGIT+ # Size of each prefix in bytes // LENGTH = DIGIT+ # Size of PREFIXES in bytes std::string FormatGetHash(const std::vector& prefixes) { std::string request; request.append(base::Uint64ToString(sizeof(SBPrefix))); request.append(":"); request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size())); request.append("\n"); // SBPrefix values are read without concern for byte order, so write back the // same way. for (size_t i = 0; i < prefixes.size(); ++i) { request.append(reinterpret_cast(&prefixes[i]), sizeof(SBPrefix)); } return request; } bool ParseUpdate(const char* chunk_data, size_t chunk_len, size_t* next_update_sec, bool* reset, std::vector* deletes, std::vector* chunk_urls) { DCHECK(next_update_sec); DCHECK(deletes); DCHECK(chunk_urls); BufferReader reader(chunk_data, chunk_len); // Populated below. std::string list_name; while (!reader.empty()) { std::vector pieces; if (!reader.GetPieces(2, &pieces)) return false; base::StringPiece& command = pieces[0]; // Differentiate on the first character of the command (which is usually // only one character, with the exception of the 'ad' and 'sd' commands). switch (command[0]) { case 'a': case 's': { // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must // have also parsed the list name before getting here, or the add-del // or sub-del will have no context. if (list_name.empty() || (command != "ad" && command != "sd")) return false; SBChunkDelete chunk_delete; chunk_delete.is_sub_del = command[0] == 's'; StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del); chunk_delete.list_name = list_name; deletes->push_back(chunk_delete); break; } case 'i': // The line providing the name of the list (i.e. 'goog-phish-shavar'). list_name = pieces[1].as_string(); break; case 'n': // The line providing the next earliest time (in seconds) to re-query. if (!base::StringToSizeT(pieces[1], next_update_sec)) return false; break; case 'u': { ChunkUrl chunk_url; chunk_url.url = pieces[1].as_string(); // Skip the initial "u:". chunk_url.list_name = list_name; chunk_urls->push_back(chunk_url); break; } case 'r': if (pieces[1] != "pleasereset") return false; *reset = true; break; default: // According to the spec, we ignore commands we don't understand. // TODO(shess): Does this apply to r:unknown or n:not-integer? break; } } return true; } // BODY = (UINT32 CHUNKDATA)+ // UINT32 = Unsigned 32-bit integer in network byte order // CHUNKDATA = Encoded ChunkData protocol message bool ParseChunk(const char* data, size_t length, ScopedVector* chunks) { BufferReader reader(data, length); while (!reader.empty()) { uint32 l = 0; if (!reader.GetNet32(&l) || l == 0 || l > reader.length()) return false; const void* p = NULL; if (!reader.RefData(&p, l)) return false; scoped_ptr chunk(new SBChunkData()); if (!chunk->ParseFrom(reinterpret_cast(p), l)) return false; chunks->push_back(chunk.release()); } DCHECK(reader.empty()); return true; } // LIST = LISTNAME ";" LISTINFO (":" LISTINFO)* // LISTINFO = CHUNKTYPE ":" CHUNKLIST // CHUNKTYPE = "a" | "s" // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST] // NUMBER = DIGIT+ // RANGE = NUMBER "-" NUMBER std::string FormatList(const SBListChunkRanges& list) { std::string formatted_results = list.name; formatted_results.append(";"); if (!list.adds.empty()) formatted_results.append("a:").append(list.adds); if (!list.adds.empty() && !list.subs.empty()) formatted_results.append(":"); if (!list.subs.empty()) formatted_results.append("s:").append(list.subs); formatted_results.append("\n"); return formatted_results; } } // namespace safe_browsing