diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-28 17:00:49 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-28 17:00:49 +0000 |
commit | 1d799ddcfcf7925b8fb8aeb6070f33eb5c4e0826 (patch) | |
tree | f0bd0656066f48929f2f82c5dfc66dd45804c133 /net/ftp | |
parent | 792785dd30f0ac1a7fc0dbf0f56e975795170feb (diff) | |
download | chromium_src-1d799ddcfcf7925b8fb8aeb6070f33eb5c4e0826.zip chromium_src-1d799ddcfcf7925b8fb8aeb6070f33eb5c4e0826.tar.gz chromium_src-1d799ddcfcf7925b8fb8aeb6070f33eb5c4e0826.tar.bz2 |
First parts of new FTP LIST response parsing code.
Added parser for "ls" listing style, and tests. This is not yet used by the browser (will do that in a following CL).
TEST=Covered by net_unittests.
BUG=25520
Review URL: http://codereview.chromium.org/244008
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@30354 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/ftp')
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer.cc | 134 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer.h | 77 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_buffer_unittest.cc | 101 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parsers.cc | 153 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parsers.h | 63 | ||||
-rw-r--r-- | net/ftp/ftp_directory_listing_parsers_unittest.cc | 93 |
6 files changed, 621 insertions, 0 deletions
diff --git a/net/ftp/ftp_directory_listing_buffer.cc b/net/ftp/ftp_directory_listing_buffer.cc new file mode 100644 index 0000000..0099e91 --- /dev/null +++ b/net/ftp/ftp_directory_listing_buffer.cc @@ -0,0 +1,134 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. + +#include "net/ftp/ftp_directory_listing_buffer.h" + +#include "base/i18n/icu_string_conversions.h" +#include "base/stl_util-inl.h" +#include "base/string_util.h" +#include "net/base/net_errors.h" +#include "net/ftp/ftp_directory_listing_parsers.h" +#include "unicode/ucsdet.h" + +namespace { + +// A very simple-minded character encoding detection. +// TODO(jungshik): We can apply more heuristics here (e.g. using various hints +// like TLD, the UI language/default encoding of a client, etc). In that case, +// this should be pulled out of here and moved somewhere in base because there +// can be other use cases. +std::string DetectEncoding(const std::string& text) { + if (IsStringASCII(text)) + return std::string(); + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open(&status); + ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), + &status); + const UCharsetMatch* match = ucsdet_detect(detector, &status); + const char* encoding = ucsdet_getName(match, &status); + // Should we check the quality of the match? A rather arbitrary number is + // assigned by ICU and it's hard to come up with a lower limit. + if (U_FAILURE(status)) + return std::string(); + return encoding; +} + +} // namespace + +namespace net { + +FtpDirectoryListingBuffer::FtpDirectoryListingBuffer() + : current_parser_(NULL) { + parsers_.insert(new FtpLsDirectoryListingParser()); +} + +FtpDirectoryListingBuffer::~FtpDirectoryListingBuffer() { + STLDeleteElements(&parsers_); +} + +int FtpDirectoryListingBuffer::ConsumeData(const char* data, int data_length) { + buffer_.append(data, data_length); + + if (!encoding_.empty() || buffer_.length() > 1024) { + int rv = ExtractFullLinesFromBuffer(); + if (rv != OK) + return rv; + } + + return ParseLines(); +} + +int FtpDirectoryListingBuffer::ProcessRemainingData() { + int rv = ExtractFullLinesFromBuffer(); + if (rv != OK) + return rv; + + return ParseLines(); +} + +bool FtpDirectoryListingBuffer::EntryAvailable() const { + return (current_parser_ ? current_parser_->EntryAvailable() : false); +} + +FtpDirectoryListingEntry FtpDirectoryListingBuffer::PopEntry() { + DCHECK(EntryAvailable()); + return current_parser_->PopEntry(); +} + +bool FtpDirectoryListingBuffer::ConvertToDetectedEncoding( + const std::string& from, string16* to) { + std::string encoding(encoding_.empty() ? "ascii" : encoding_); + return base::CodepageToUTF16(from, encoding.c_str(), + base::OnStringConversionError::FAIL, to); +} + +int FtpDirectoryListingBuffer::ExtractFullLinesFromBuffer() { + if (encoding_.empty()) + encoding_ = DetectEncoding(buffer_); + + int cut_pos = 0; + for (size_t i = 0; i < buffer_.length(); ++i) { + if (i >= 1 && buffer_[i - 1] == '\r' && buffer_[i] == '\n') { + std::string line(buffer_.substr(cut_pos, i - cut_pos - 1)); + cut_pos = i + 1; + string16 line_converted; + if (!ConvertToDetectedEncoding(line, &line_converted)) { + buffer_.erase(0, cut_pos); + return ERR_ENCODING_CONVERSION_FAILED; + } + lines_.push_back(line_converted); + } + } + buffer_.erase(0, cut_pos); + return OK; +} + +int FtpDirectoryListingBuffer::ParseLines() { + while (!lines_.empty()) { + string16 line = lines_.front(); + lines_.pop_front(); + if (current_parser_) { + if (!current_parser_->ConsumeLine(line)) + return ERR_FAILED; + } else { + ParserSet::iterator i = parsers_.begin(); + while (i != parsers_.end()) { + if ((*i)->ConsumeLine(line)) { + i++; + } else { + delete *i; + parsers_.erase(i++); + } + } + if (parsers_.empty()) + return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT; + if (parsers_.size() == 1) + current_parser_ = *parsers_.begin(); + } + } + + return OK; +} + +} // namespace net diff --git a/net/ftp/ftp_directory_listing_buffer.h b/net/ftp/ftp_directory_listing_buffer.h new file mode 100644 index 0000000..eaa237e --- /dev/null +++ b/net/ftp/ftp_directory_listing_buffer.h @@ -0,0 +1,77 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. + +#ifndef NET_FTP_FTP_DIRECTORY_LISTING_BUFFER_H_ +#define NET_FTP_FTP_DIRECTORY_LISTING_BUFFER_H_ + +#include <deque> +#include <set> +#include <string> + +#include "base/basictypes.h" +#include "base/string16.h" +#include "base/time.h" + +namespace net { + +struct FtpDirectoryListingEntry; +class FtpDirectoryListingParser; + +class FtpDirectoryListingBuffer { + public: + FtpDirectoryListingBuffer(); + + ~FtpDirectoryListingBuffer(); + + // Called when data is received from the data socket. Returns network + // error code. + int ConsumeData(const char* data, int data_length); + + // Called when all received data has been consumed by this buffer. Tells the + // buffer to try to parse remaining raw data and returns network error code. + int ProcessRemainingData(); + + bool EntryAvailable() const; + + // Returns the next entry. It is an error to call this function + // unless EntryAvailable returns true. + FtpDirectoryListingEntry PopEntry(); + + private: + typedef std::set<FtpDirectoryListingParser*> ParserSet; + + // Converts the string |from| to detected encoding and stores it in |to|. + // Returns true on success. + bool ConvertToDetectedEncoding(const std::string& from, string16* to); + + // Tries to extract full lines from the raw buffer, converting them to the + // detected encoding. Returns network error code. + int ExtractFullLinesFromBuffer(); + + // Tries to parse full lines stored in |lines_|. Returns network error code. + int ParseLines(); + + // Detected encoding of the response (empty if unknown or ASCII). + std::string encoding_; + + // Buffer to keep not-yet-split data. + std::string buffer_; + + // CRLF-delimited lines, without the CRLF, not yet consumed by parser. + std::deque<string16> lines_; + + // A collection of parsers for different listing styles. The parsers are owned + // by this FtpDirectoryListingBuffer. + ParserSet parsers_; + + // When we're sure about the listing format, its parser is stored in + // |current_parser_|. + FtpDirectoryListingParser* current_parser_; + + DISALLOW_COPY_AND_ASSIGN(FtpDirectoryListingBuffer); +}; + +} // namespace net + +#endif // NET_FTP_FTP_DIRECTORY_LISTING_BUFFER_H_ diff --git a/net/ftp/ftp_directory_listing_buffer_unittest.cc b/net/ftp/ftp_directory_listing_buffer_unittest.cc new file mode 100644 index 0000000..a98e412 --- /dev/null +++ b/net/ftp/ftp_directory_listing_buffer_unittest.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/ftp/ftp_directory_listing_buffer.h" + +#include "base/file_util.h" +#include "base/path_service.h" +#include "base/string_tokenizer.h" +#include "base/string_util.h" +#include "net/base/net_errors.h" +#include "net/ftp/ftp_directory_listing_parsers.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +TEST(FtpDirectoryListingBufferTest, Parse) { + const char* test_files[] = { + "dir-listing-ls-1", + "dir-listing-ls-2", + }; + + FilePath test_dir; + PathService::Get(base::DIR_SOURCE_ROOT, &test_dir); + test_dir = test_dir.AppendASCII("net"); + test_dir = test_dir.AppendASCII("data"); + test_dir = test_dir.AppendASCII("ftp"); + + for (size_t i = 0; i < arraysize(test_files); i++) { + SCOPED_TRACE(StringPrintf("Test[%d]: %s", i, test_files[i])); + + net::FtpDirectoryListingBuffer buffer; + + std::string test_listing; + EXPECT_TRUE(file_util::ReadFileToString(test_dir.AppendASCII(test_files[i]), + &test_listing)); + + EXPECT_EQ(net::OK, buffer.ConsumeData(test_listing.data(), + test_listing.length())); + EXPECT_EQ(net::OK, buffer.ProcessRemainingData()); + + std::string expected_listing; + ASSERT_TRUE(file_util::ReadFileToString( + test_dir.AppendASCII(std::string(test_files[i]) + ".expected"), + &expected_listing)); + + std::vector<std::string> lines; + StringTokenizer tokenizer(expected_listing, "\r\n"); + while (tokenizer.GetNext()) + lines.push_back(tokenizer.token()); + ASSERT_EQ(0U, lines.size() % 7); + + for (size_t i = 0; i < lines.size() / 7; i++) { + std::string type(lines[7 * i]); + std::string name(lines[7 * i + 1]); + + SCOPED_TRACE(StringPrintf("Filename: %s", name.c_str())); + + int year; + if (lines[7 * i + 2] == "current") { + base::Time::Exploded now_exploded; + base::Time::Now().LocalExplode(&now_exploded); + year = now_exploded.year; + } else { + year = StringToInt(lines[7 * i + 2]); + } + int month = StringToInt(lines[7 * i + 3]); + int day_of_month = StringToInt(lines[7 * i + 4]); + int hour = StringToInt(lines[7 * i + 5]); + int minute = StringToInt(lines[7 * i + 6]); + + ASSERT_TRUE(buffer.EntryAvailable()); + net::FtpDirectoryListingEntry entry = buffer.PopEntry(); + + if (type == "d") { + EXPECT_EQ(net::FtpDirectoryListingEntry::DIRECTORY, entry.type); + } else if (type == "-") { + EXPECT_EQ(net::FtpDirectoryListingEntry::FILE, entry.type); + } else if (type == "l") { + EXPECT_EQ(net::FtpDirectoryListingEntry::SYMLINK, entry.type); + } else { + ADD_FAILURE() << "invalid gold test data: " << type; + } + + EXPECT_EQ(UTF8ToUTF16(name), entry.name); + + base::Time::Exploded time_exploded; + entry.last_modified.LocalExplode(&time_exploded); + EXPECT_EQ(year, time_exploded.year); + EXPECT_EQ(month, time_exploded.month); + EXPECT_EQ(day_of_month, time_exploded.day_of_month); + EXPECT_EQ(hour, time_exploded.hour); + EXPECT_EQ(minute, time_exploded.minute); + EXPECT_EQ(0, time_exploded.second); + EXPECT_EQ(0, time_exploded.millisecond); + } + EXPECT_FALSE(buffer.EntryAvailable()); + } +} + +} // namespace diff --git a/net/ftp/ftp_directory_listing_parsers.cc b/net/ftp/ftp_directory_listing_parsers.cc new file mode 100644 index 0000000..c1a7f76 --- /dev/null +++ b/net/ftp/ftp_directory_listing_parsers.cc @@ -0,0 +1,153 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. + +#include "net/ftp/ftp_directory_listing_parsers.h" + +#include "base/string_util.h" + +namespace { + +bool LooksLikeUnixPermission(const string16& text) { + if (text.length() != 3) + return false; + + return ((text[0] == 'r' || text[0] == '-') && + (text[1] == 'w' || text[1] == '-') && + (text[2] == 'x' || text[2] == 's' || text[2] == 'S' || + text[2] == '-')); +} + +bool LooksLikeUnixPermissionsListing(const string16& text) { + if (text.length() != 10) + return false; + + if (text[0] != 'b' && text[0] != 'c' && text[0] != 'd' && + text[0] != 'l' && text[0] != 'p' && text[0] != 's' && + text[0] != '-') + return false; + + return (LooksLikeUnixPermission(text.substr(1, 3)) && + LooksLikeUnixPermission(text.substr(4, 3)) && + LooksLikeUnixPermission(text.substr(7, 3))); +} + +bool IsStringNonNegativeNumber(const string16& text) { + int number; + if (!StringToInt(text, &number)) + return false; + + return number >= 0; +} + +bool ThreeLetterMonthToNumber(const string16& text, int* number) { + const static char* months[] = { "jan", "feb", "mar", "apr", "may", "jun", + "jul", "aug", "sep", "oct", "nov", "dec" }; + + for (size_t i = 0; i < arraysize(months); i++) { + if (LowerCaseEqualsASCII(text, months[i])) { + *number = i + 1; + return true; + } + } + + return false; +} + +bool UnixDateListingToTime(const std::vector<string16>& columns, + base::Time* time) { + DCHECK_EQ(9U, columns.size()); + + base::Time::Exploded time_exploded = { 0 }; + + if (!ThreeLetterMonthToNumber(columns[5], &time_exploded.month)) + return false; + + if (!StringToInt(columns[6], &time_exploded.day_of_month)) + return false; + + if (!StringToInt(columns[7], &time_exploded.year)) { + // Maybe it's time. Does it look like time (MM:HH)? + if (columns[7].length() != 5 || columns[7][2] != ':') + return false; + + if (!StringToInt(columns[7].substr(0, 2), &time_exploded.hour)) + return false; + + if (!StringToInt(columns[7].substr(3, 2), &time_exploded.minute)) + return false; + + // Use current year. + base::Time::Exploded now_exploded; + base::Time::Now().LocalExplode(&now_exploded); + time_exploded.year = now_exploded.year; + } + + // We don't know the time zone of the server, so just use local time. + *time = base::Time::FromLocalExploded(time_exploded); + return true; +} + +} // namespace + +namespace net { + +FtpDirectoryListingParser::~FtpDirectoryListingParser() { +} + +FtpLsDirectoryListingParser::FtpLsDirectoryListingParser() { +} + +bool FtpLsDirectoryListingParser::ConsumeLine(const string16& line) { + std::vector<string16> columns; + SplitString(CollapseWhitespace(line, false), ' ', &columns); + if (columns.size() == 11) { + // Check if it is a symlink. + if (columns[9] != ASCIIToUTF16("->")) + return false; + + // Drop the symlink target from columns, we don't use it. + columns.resize(9); + } + + if (columns.size() != 9) + return false; + + if (!LooksLikeUnixPermissionsListing(columns[0])) + return false; + + FtpDirectoryListingEntry entry; + if (columns[0][0] == 'l') { + entry.type = FtpDirectoryListingEntry::SYMLINK; + } else if (columns[0][0] == 'd') { + entry.type = FtpDirectoryListingEntry::DIRECTORY; + } else { + entry.type = FtpDirectoryListingEntry::FILE; + } + + if (!IsStringNonNegativeNumber(columns[1])) + return false; + + if (!IsStringNonNegativeNumber(columns[4])) + return false; + + if (!UnixDateListingToTime(columns, &entry.last_modified)) + return false; + + entry.name = columns[8]; + + entries_.push(entry); + return true; +} + +bool FtpLsDirectoryListingParser::EntryAvailable() const { + return !entries_.empty(); +} + +FtpDirectoryListingEntry FtpLsDirectoryListingParser::PopEntry() { + FtpDirectoryListingEntry entry = entries_.front(); + entries_.pop(); + return entry; +} + +} // namespace net diff --git a/net/ftp/ftp_directory_listing_parsers.h b/net/ftp/ftp_directory_listing_parsers.h new file mode 100644 index 0000000..4fda042 --- /dev/null +++ b/net/ftp/ftp_directory_listing_parsers.h @@ -0,0 +1,63 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this +// source code is governed by a BSD-style license that can be found in the +// LICENSE file. + +#ifndef NET_FTP_FTP_DIRECTORY_LISTING_PARSERS_H_ +#define NET_FTP_FTP_DIRECTORY_LISTING_PARSERS_H_ + +#include <queue> + +#include "base/basictypes.h" +#include "base/string16.h" +#include "base/time.h" +#include "net/ftp/ftp_server_type_histograms.h" + +namespace net { + +struct FtpDirectoryListingEntry { + enum Type { + FILE, + DIRECTORY, + SYMLINK, + }; + + Type type; + string16 name; + + // Last modified time, in local time zone. + base::Time last_modified; +}; + +class FtpDirectoryListingParser { + public: + virtual ~FtpDirectoryListingParser(); + + // Adds |line| to the internal parsing buffer. Returns true on success. + virtual bool ConsumeLine(const string16& line) = 0; + + // Returns true if there is at least one FtpDirectoryListingEntry available. + virtual bool EntryAvailable() const = 0; + + // Returns the next entry. It is an error to call this function unless + // EntryAvailable returns true. + virtual FtpDirectoryListingEntry PopEntry() = 0; +}; + +class FtpLsDirectoryListingParser : public FtpDirectoryListingParser { + public: + FtpLsDirectoryListingParser(); + + // FtpDirectoryListingParser methods: + virtual bool ConsumeLine(const string16& line); + virtual bool EntryAvailable() const; + virtual FtpDirectoryListingEntry PopEntry(); + + private: + std::queue<FtpDirectoryListingEntry> entries_; + + DISALLOW_COPY_AND_ASSIGN(FtpLsDirectoryListingParser); +}; + +} // namespace net + +#endif // NET_FTP_FTP_DIRECTORY_LISTING_PARSERS_H_ diff --git a/net/ftp/ftp_directory_listing_parsers_unittest.cc b/net/ftp/ftp_directory_listing_parsers_unittest.cc new file mode 100644 index 0000000..e45968a --- /dev/null +++ b/net/ftp/ftp_directory_listing_parsers_unittest.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/ftp/ftp_directory_listing_parsers.h" + +#include "base/string_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +struct SingleLineTestData { + const char* input; + net::FtpDirectoryListingEntry::Type type; + const char* filename; + int year; + int month; + int day_of_month; + int hour; + int minute; +}; + +class FtpDirectoryListingParsersTest : public testing::Test { + protected: + FtpDirectoryListingParsersTest() { + } + + void RunSingleLineTestCase(net::FtpDirectoryListingParser* parser, + const SingleLineTestData& test_case) { + ASSERT_TRUE(parser->ConsumeLine(UTF8ToUTF16(test_case.input))); + ASSERT_TRUE(parser->EntryAvailable()); + net::FtpDirectoryListingEntry entry = parser->PopEntry(); + EXPECT_EQ(test_case.type, entry.type); + EXPECT_EQ(UTF8ToUTF16(test_case.filename), entry.name); + + base::Time::Exploded time_exploded; + entry.last_modified.LocalExplode(&time_exploded); + EXPECT_EQ(test_case.year, time_exploded.year); + EXPECT_EQ(test_case.month, time_exploded.month); + EXPECT_EQ(test_case.day_of_month, time_exploded.day_of_month); + EXPECT_EQ(test_case.hour, time_exploded.hour); + EXPECT_EQ(test_case.minute, time_exploded.minute); + EXPECT_EQ(0, time_exploded.second); + EXPECT_EQ(0, time_exploded.millisecond); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FtpDirectoryListingParsersTest); +}; + +TEST_F(FtpDirectoryListingParsersTest, Ls) { + base::Time::Exploded now_exploded; + base::Time::Now().LocalExplode(&now_exploded); + + const struct SingleLineTestData good_cases[] = { + { "-rw-r--r-- 1 ftp ftp 528 Nov 01 2007 README", + net::FtpDirectoryListingEntry::FILE, "README", + 2007, 11, 1, 0, 0 }, + { "drwxr-xr-x 3 ftp ftp 4096 May 15 18:11 directory", + net::FtpDirectoryListingEntry::DIRECTORY, "directory", + now_exploded.year, 5, 15, 18, 11 }, + { "lrwxrwxrwx 1 0 0 26 Sep 18 2008 pub -> vol/1/.CLUSTER/var_ftp/pub", + net::FtpDirectoryListingEntry::SYMLINK, "pub", + 2008, 9, 18, 0, 0 }, + { "lrwxrwxrwx 1 0 0 3 Oct 12 13:37 mirror -> pub", + net::FtpDirectoryListingEntry::SYMLINK, "mirror", + now_exploded.year, 10, 12, 13, 37 }, + }; + for (size_t i = 0; i < arraysize(good_cases); i++) { + SCOPED_TRACE(StringPrintf("Test[%d]: %s", i, good_cases[i])); + + net::FtpLsDirectoryListingParser parser; + RunSingleLineTestCase(&parser, good_cases[i]); + } + + const char* bad_cases[] = { + "", + "garbage", + "-rw-r--r-- 1 ftp ftp", + "-rw-r--rgb 1 ftp ftp 528 Nov 01 2007 README", + "-rw-rgbr-- 1 ftp ftp 528 Nov 01 2007 README", + "qrwwr--r-- 1 ftp ftp 528 Nov 01 2007 README", + "-rw-r--r-- -1 ftp ftp 528 Nov 01 2007 README", + "-rw-r--r-- 1 ftp ftp -528 Nov 01 2007 README", + "-rw-r--r-- 1 ftp ftp 528 Foo 01 2007 README", + }; + for (size_t i = 0; i < arraysize(bad_cases); i++) { + net::FtpLsDirectoryListingParser parser; + EXPECT_FALSE(parser.ConsumeLine(UTF8ToUTF16(bad_cases[i]))) << bad_cases[i]; + } +} + +} // namespace |