summaryrefslogtreecommitdiffstats
path: root/chromeos/process_proxy
diff options
context:
space:
mode:
authortbarzic@chromium.org <tbarzic@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-05-15 20:34:37 +0000
committertbarzic@chromium.org <tbarzic@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-05-15 20:34:37 +0000
commit65bbd9d2a88029d0700c64283937ae09d3090ade (patch)
treeb0cdc8af1d28d87ce12aaece06318e4f576d11e5 /chromeos/process_proxy
parent27b57935d1aff03999cbbb89ca7e5a912c453373 (diff)
downloadchromium_src-65bbd9d2a88029d0700c64283937ae09d3090ade.zip
chromium_src-65bbd9d2a88029d0700c64283937ae09d3090ade.tar.gz
chromium_src-65bbd9d2a88029d0700c64283937ae09d3090ade.tar.bz2
Improve process output watcher's handling of multi-byte UTF8 characters
Speciffically, handle case when a non ASCII UTF8 character is read partially from the process output. Instead of reporting read character bytes imediatelly, cache them until the rest of the character is read from the process output. BUG=278340 TEST=in crosh (see comment #1 in the bug): $python >>> print(u"\u20ac" * 10000) Verify a series of EURO signs is displayed (without "unknown" characters) Review URL: https://codereview.chromium.org/261743002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@270799 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chromeos/process_proxy')
-rw-r--r--chromeos/process_proxy/process_output_watcher.cc104
-rw-r--r--chromeos/process_proxy/process_output_watcher.h24
-rw-r--r--chromeos/process_proxy/process_output_watcher_unittest.cc213
-rw-r--r--chromeos/process_proxy/process_proxy_unittest.cc2
4 files changed, 283 insertions, 60 deletions
diff --git a/chromeos/process_proxy/process_output_watcher.cc b/chromeos/process_proxy/process_output_watcher.cc
index f063984..906fe63 100644
--- a/chromeos/process_proxy/process_output_watcher.cc
+++ b/chromeos/process_proxy/process_output_watcher.cc
@@ -4,16 +4,17 @@
#include "chromeos/process_proxy/process_output_watcher.h"
-#include <algorithm>
-#include <cstdio>
-#include <cstring>
-
#include <sys/ioctl.h>
#include <sys/select.h>
#include <unistd.h>
+#include <algorithm>
+#include <cstdio>
+#include <cstring>
+
#include "base/logging.h"
#include "base/posix/eintr_wrapper.h"
+#include "base/third_party/icu/icu_utf.h"
namespace {
@@ -32,20 +33,41 @@ void CloseFd(int* fd) {
*fd = -1;
}
+// Gets byte size for a UTF8 character given it's leading byte. The character
+// size is encoded as number of leading '1' bits in the character's leading
+// byte. If the most significant bit is '0', the character is a valid ASCII
+// and it's byte size is 1.
+// The method returns 1 if the provided byte is invalid leading byte.
+size_t UTF8SizeFromLeadingByte(uint8 leading_byte) {
+ size_t byte_count = 0;
+ uint8 mask = 1 << 7;
+ uint8 error_mask = 1 << (7 - CBU8_MAX_LENGTH);
+ while (leading_byte & mask) {
+ if (mask & error_mask)
+ return 1;
+ mask >>= 1;
+ ++byte_count;
+ }
+ return byte_count ? byte_count : 1;
+}
+
} // namespace
namespace chromeos {
-ProcessOutputWatcher::ProcessOutputWatcher(int out_fd, int stop_fd,
+ProcessOutputWatcher::ProcessOutputWatcher(
+ int out_fd,
+ int stop_fd,
const ProcessOutputCallback& callback)
- : out_fd_(out_fd),
+ : read_buffer_size_(0),
+ out_fd_(out_fd),
stop_fd_(stop_fd),
- on_read_callback_(callback) {
+ on_read_callback_(callback) {
VerifyFileDescriptor(out_fd_);
VerifyFileDescriptor(stop_fd_);
max_fd_ = std::max(out_fd_, stop_fd_);
// We want to be sure we will be able to add 0 at the end of the input, so -1.
- read_buffer_size_ = arraysize(read_buffer_) - 1;
+ read_buffer_capacity_ = arraysize(read_buffer_) - 1;
}
void ProcessOutputWatcher::Start() {
@@ -62,7 +84,7 @@ void ProcessOutputWatcher::WatchProcessOutput() {
while (true) {
// This has to be reset with every watch cycle.
fd_set rfds;
- DCHECK(stop_fd_ >= 0);
+ DCHECK_GE(stop_fd_, 0);
InitReadFdSet(out_fd_, stop_fd_, &rfds);
int select_result =
@@ -94,13 +116,16 @@ void ProcessOutputWatcher::ReadFromFd(ProcessOutputType type, int* fd) {
// other streams in case data is written faster than we read it. If there is
// more than read_buffer_size_ bytes in pipe, it will be read in the next
// iteration.
- ssize_t bytes_read = HANDLE_EINTR(read(*fd, read_buffer_, read_buffer_size_));
+ DCHECK_GT(read_buffer_capacity_, read_buffer_size_);
+ ssize_t bytes_read =
+ HANDLE_EINTR(read(*fd,
+ &read_buffer_[read_buffer_size_],
+ read_buffer_capacity_ - read_buffer_size_));
if (bytes_read < 0)
DPLOG(WARNING) << "read from buffer failed";
- if (bytes_read > 0) {
- on_read_callback_.Run(type, std::string(read_buffer_, bytes_read));
- }
+ if (bytes_read > 0)
+ ReportOutput(type, bytes_read);
// If there is nothing on the output the watched process has exited (slave end
// of pty is closed).
@@ -113,6 +138,59 @@ void ProcessOutputWatcher::ReadFromFd(ProcessOutputType type, int* fd) {
}
}
+size_t ProcessOutputWatcher::OutputSizeWithoutIncompleteUTF8() {
+ // Find the last non-trailing character byte. This byte should be used to
+ // infer the last UTF8 character length.
+ int last_lead_byte = read_buffer_size_ - 1;
+ while (true) {
+ // If the series of trailing bytes is too long, something's not right.
+ // Report the whole output, without waiting for further character bytes.
+ if (read_buffer_size_ - last_lead_byte > CBU8_MAX_LENGTH)
+ return read_buffer_size_;
+
+ // If there are trailing characters, there must be a leading one in the
+ // buffer for a valid UTF8 character. Getting past the buffer begining
+ // signals something's wrong, or the buffer is empty. In both cases return
+ // the whole current buffer.
+ if (last_lead_byte < 0)
+ return read_buffer_size_;
+
+ // Found the starting character byte; stop searching.
+ if (!CBU8_IS_TRAIL(read_buffer_[last_lead_byte]))
+ break;
+
+ --last_lead_byte;
+ }
+
+ size_t last_length = UTF8SizeFromLeadingByte(read_buffer_[last_lead_byte]);
+
+ // Note that if |last_length| == 0 or
+ // |last_length| + |last_read_byte| < |read_buffer_size_|, the string is
+ // invalid UTF8. In that case, send the whole read buffer to the observer
+ // immediately, just as if there is no trailing incomplete UTF8 bytes.
+ if (!last_length || last_length + last_lead_byte <= read_buffer_size_)
+ return read_buffer_size_;
+
+ return last_lead_byte;
+}
+
+void ProcessOutputWatcher::ReportOutput(ProcessOutputType type,
+ size_t new_bytes_count) {
+ read_buffer_size_ += new_bytes_count;
+ size_t output_to_report = OutputSizeWithoutIncompleteUTF8();
+
+ on_read_callback_.Run(type, std::string(read_buffer_, output_to_report));
+
+ // Move the bytes that were left behind to the beginning of the buffer and
+ // update the buffer size accordingly.
+ if (output_to_report < read_buffer_size_) {
+ for (size_t i = output_to_report; i < read_buffer_size_; ++i) {
+ read_buffer_[i - output_to_report] = read_buffer_[i];
+ }
+ }
+ read_buffer_size_ -= output_to_report;
+}
+
void ProcessOutputWatcher::OnStop() {
delete this;
}
diff --git a/chromeos/process_proxy/process_output_watcher.h b/chromeos/process_proxy/process_output_watcher.h
index 2a58f8a..598fd5f 100644
--- a/chromeos/process_proxy/process_output_watcher.h
+++ b/chromeos/process_proxy/process_output_watcher.h
@@ -10,12 +10,6 @@
#include "base/callback.h"
#include "chromeos/chromeos_export.h"
-namespace {
-
-const int kReadBufferSize = 256;
-
-} // namespace
-
namespace chromeos {
enum ProcessOutputType {
@@ -27,6 +21,9 @@ enum ProcessOutputType {
typedef base::Callback<void(ProcessOutputType, const std::string&)>
ProcessOutputCallback;
+// Observes output on |out_fd| and invokes |callback| when some output is
+// detected. It assumes UTF8 output.
+// If output is detected in |stop_fd|, the watcher is stopped.
// This class should live on its own thread because running class makes
// underlying thread block. It deletes itself when watching is stopped.
class CHROMEOS_EXPORT ProcessOutputWatcher {
@@ -52,11 +49,22 @@ class CHROMEOS_EXPORT ProcessOutputWatcher {
// Reads data from fd, and when it's done, invokes callback function.
void ReadFromFd(ProcessOutputType type, int* fd);
+ // Checks if the read buffer has any trailing incomplete UTF8 characters and
+ // returns the read buffer size without them.
+ size_t OutputSizeWithoutIncompleteUTF8();
+
+ // Processes new |read_buffer_| state and notifies observer about new process
+ // output.
+ void ReportOutput(ProcessOutputType type, size_t new_bytes_count);
+
// It will just delete this.
void OnStop();
- char read_buffer_[kReadBufferSize];
- ssize_t read_buffer_size_;
+ char read_buffer_[256];
+ // Maximum read buffer content size.
+ size_t read_buffer_capacity_;
+ // Current read bufferi content size.
+ size_t read_buffer_size_;
int out_fd_;
int stop_fd_;
diff --git a/chromeos/process_proxy/process_output_watcher_unittest.cc b/chromeos/process_proxy/process_output_watcher_unittest.cc
index 5eeaf1a..8e4c1665 100644
--- a/chromeos/process_proxy/process_output_watcher_unittest.cc
+++ b/chromeos/process_proxy/process_output_watcher_unittest.cc
@@ -8,40 +8,48 @@
#include <string>
#include <vector>
-#include <sys/wait.h>
-
#include "base/bind.h"
+#include "base/callback.h"
#include "base/file_util.h"
+#include "base/message_loop/message_loop.h"
#include "base/posix/eintr_wrapper.h"
-#include "base/synchronization/waitable_event.h"
+#include "base/run_loop.h"
+#include "base/strings/string_util.h"
#include "base/threading/thread.h"
#include "chromeos/process_proxy/process_output_watcher.h"
namespace chromeos {
struct TestCase {
- std::string str;
+ TestCase(const std::string& input, bool send_terminating_null)
+ : input(input),
+ should_send_terminating_null(send_terminating_null),
+ expected_output(input) {}
+
+ // Conctructor for cases where the output is not expected to be the same as
+ // input.
+ TestCase(const std::string& input,
+ bool send_terminating_null,
+ const std::string& expected_output)
+ : input(input),
+ should_send_terminating_null(send_terminating_null),
+ expected_output(expected_output) {}
+
+ std::string input;
bool should_send_terminating_null;
-
- TestCase(const std::string& expected_string,
- bool send_terminating_null)
- : str(expected_string),
- should_send_terminating_null(send_terminating_null) {
- }
+ std::string expected_output;
};
class ProcessWatcherExpectations {
public:
ProcessWatcherExpectations() {}
- void Init(const std::vector<TestCase>& expectations) {
+ void SetTestCase(const TestCase& test_case) {
received_from_out_ = 0;
- for (size_t i = 0; i < expectations.size(); i++) {
- out_expectations_.append(expectations[i].str);
- if (expectations[i].should_send_terminating_null)
- out_expectations_.append(std::string("", 1));
- }
+ out_expectations_ = test_case.expected_output;
+ if (test_case.should_send_terminating_null)
+ out_expectations_.append(std::string("", 1));
}
bool CheckExpectations(const std::string& data, ProcessOutputType type) {
@@ -49,12 +57,17 @@ class ProcessWatcherExpectations {
if (type != PROCESS_OUTPUT_TYPE_OUT)
return false;
+ if (out_expectations_.length() == 0 && data.length() == 0)
+ return true;
+
EXPECT_LT(received_from_out_, out_expectations_.length());
if (received_from_out_ >= out_expectations_.length())
return false;
EXPECT_EQ(received_from_out_,
out_expectations_.find(data, received_from_out_));
+ if (received_from_out_ != out_expectations_.find(data, received_from_out_))
+ return false;
received_from_out_ += data.length();
return true;
@@ -70,11 +83,19 @@ class ProcessWatcherExpectations {
};
class ProcessOutputWatcherTest : public testing::Test {
-public:
- void StartWatch(int pt, int stop,
- const std::vector<TestCase>& expectations) {
- expectations_.Init(expectations);
+ public:
+ ProcessOutputWatcherTest() : output_watch_thread_started_(false),
+ failed_(false) {
+ }
+
+ virtual ~ProcessOutputWatcherTest() {}
+ virtual void TearDown() OVERRIDE {
+ if (output_watch_thread_started_)
+ output_watch_thread_->Stop();
+ }
+
+ void StartWatch(int pt, int stop) {
// This will delete itself.
ProcessOutputWatcher* crosh_watcher = new ProcessOutputWatcher(pt, stop,
base::Bind(&ProcessOutputWatcherTest::OnRead, base::Unretained(this)));
@@ -82,9 +103,13 @@ public:
}
void OnRead(ProcessOutputType type, const std::string& output) {
- bool success = expectations_.CheckExpectations(output, type);
- if (!success || expectations_.IsDone())
- all_data_received_->Signal();
+ ASSERT_FALSE(failed_);
+ failed_ = !expectations_.CheckExpectations(output, type);
+ if (failed_ || expectations_.IsDone()) {
+ ASSERT_FALSE(test_case_done_callback_.is_null());
+ message_loop_.PostTask(FROM_HERE, test_case_done_callback_);
+ test_case_done_callback_.Reset();
+ }
}
protected:
@@ -96,22 +121,30 @@ public:
}
void RunTest(const std::vector<TestCase>& test_cases) {
- all_data_received_.reset(new base::WaitableEvent(true, false));
-
- base::Thread output_watch_thread("ProcessOutpuWatchThread");
- ASSERT_TRUE(output_watch_thread.Start());
+ ASSERT_FALSE(output_watch_thread_started_);
+ output_watch_thread_.reset(new base::Thread("ProcessOutpuWatchThread"));
+ output_watch_thread_started_ = output_watch_thread_->Start();
+ ASSERT_TRUE(output_watch_thread_started_);
int pt_pipe[2], stop_pipe[2];
ASSERT_FALSE(HANDLE_EINTR(pipe(pt_pipe)));
ASSERT_FALSE(HANDLE_EINTR(pipe(stop_pipe)));
- output_watch_thread.message_loop()->PostTask(FROM_HERE,
+ output_watch_thread_->message_loop()->PostTask(
+ FROM_HERE,
base::Bind(&ProcessOutputWatcherTest::StartWatch,
base::Unretained(this),
- pt_pipe[0], stop_pipe[0], test_cases));
+ pt_pipe[0],
+ stop_pipe[0]));
for (size_t i = 0; i < test_cases.size(); i++) {
- const std::string& test_str = test_cases[i].str;
+ expectations_.SetTestCase(test_cases[i]);
+
+ base::RunLoop run_loop;
+ ASSERT_TRUE(test_case_done_callback_.is_null());
+ test_case_done_callback_ = run_loop.QuitClosure();
+
+ const std::string& test_str = test_cases[i].input;
// Let's make inputs not NULL terminated, unless other is specified in
// the test case.
ssize_t test_size = test_str.length() * sizeof(*test_str.c_str());
@@ -120,22 +153,26 @@ public:
EXPECT_EQ(test_size,
base::WriteFileDescriptor(pt_pipe[1], test_str.c_str(),
test_size));
- }
- all_data_received_->Wait();
+ run_loop.Run();
+ EXPECT_TRUE(expectations_.IsDone());
+ if (failed_)
+ break;
+ }
// Send stop signal. It is not important which string we send.
EXPECT_EQ(1, base::WriteFileDescriptor(stop_pipe[1], "q", 1));
EXPECT_NE(-1, IGNORE_EINTR(close(stop_pipe[1])));
EXPECT_NE(-1, IGNORE_EINTR(close(pt_pipe[1])));
-
- output_watch_thread.Stop();
}
- scoped_ptr<base::WaitableEvent> all_data_received_;
-
private:
+ base::Closure test_case_done_callback_;
+ base::MessageLoop message_loop_;
+ scoped_ptr<base::Thread> output_watch_thread_;
+ bool output_watch_thread_started_;
+ bool failed_;
ProcessWatcherExpectations expectations_;
std::vector<TestCase> exp;
};
@@ -143,6 +180,7 @@ public:
TEST_F(ProcessOutputWatcherTest, OutputWatcher) {
std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("t", false));
test_cases.push_back(TestCase("testing output\n", false));
test_cases.push_back(TestCase("testing error\n", false));
test_cases.push_back(TestCase("testing error1\n", false));
@@ -155,11 +193,112 @@ TEST_F(ProcessOutputWatcherTest, OutputWatcher) {
RunTest(test_cases);
};
+TEST_F(ProcessOutputWatcherTest, SplitUTF8Character) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("test1\xc2", false, "test1"));
+ test_cases.push_back(TestCase("\xb5test1", false, "\xc2\xb5test1"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, SplitSoleUTF8Character) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("\xc2", false, ""));
+ test_cases.push_back(TestCase("\xb5", false, "\xc2\xb5"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, SplitUTF8CharacterLength3) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("test3\xe2\x82", false, "test3"));
+ test_cases.push_back(TestCase("\xac", false, "\xe2\x82\xac"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, SplitSoleUTF8CharacterThreeWays) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("\xe2", false, ""));
+ test_cases.push_back(TestCase("\x82", false, ""));
+ test_cases.push_back(TestCase("\xac", false, "\xe2\x82\xac"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, EndsWithThreeByteUTF8Character) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("test\xe2\x82\xac", false, "test\xe2\x82\xac"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, SoleThreeByteUTF8Character) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("\xe2\x82\xac", false, "\xe2\x82\xac"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, HasThreeByteUTF8Character) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(
+ TestCase("test\xe2\x82\xac_", false, "test\xe2\x82\xac_"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, MulitByteUTF8CharNullTerminated) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("test\xe2\x82\xac", true, "test\xe2\x82\xac"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, MultipleMultiByteUTF8Characters) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(
+ TestCase("test\xe2\x82\xac\xc2", false, "test\xe2\x82\xac"));
+ test_cases.push_back(TestCase("\xb5", false, "\xc2\xb5"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, ContainsInvalidUTF8) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("\xc2_", false, "\xc2_"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, InvalidUTF8SeriesOfTrailingBytes) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("\x82\x82\x82", false, "\x82\x82\x82"));
+ test_cases.push_back(TestCase("\x82\x82\x82", false, "\x82\x82\x82"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, EndsWithInvalidUTF8) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("\xff", false, "\xff"));
+
+ RunTest(test_cases);
+}
+
+TEST_F(ProcessOutputWatcherTest, FourByteUTF8) {
+ std::vector<TestCase> test_cases;
+ test_cases.push_back(TestCase("\xf0\xa4\xad", false, ""));
+ test_cases.push_back(TestCase("\xa2", false, "\xf0\xa4\xad\xa2"));
+
+ RunTest(test_cases);
+};
+
// Verifies that sending '\0' generates PROCESS_OUTPUT_TYPE_OUT event and does
// not terminate output watcher.
TEST_F(ProcessOutputWatcherTest, SendNull) {
std::vector<TestCase> test_cases;
- // This will send '\0' to output wathcer.
+ // This will send '\0' to output watcher.
test_cases.push_back(TestCase("", true));
// Let's verify that next input also gets detected (i.e. output watcher does
// not exit after seeing '\0' from previous test case).
diff --git a/chromeos/process_proxy/process_proxy_unittest.cc b/chromeos/process_proxy/process_proxy_unittest.cc
index badb7dc..35d9cf2 100644
--- a/chromeos/process_proxy/process_proxy_unittest.cc
+++ b/chromeos/process_proxy/process_proxy_unittest.cc
@@ -5,13 +5,11 @@
#include <gtest/gtest.h>
#include <string>
-#include <sys/wait.h>
#include "base/bind.h"
#include "base/memory/scoped_ptr.h"
#include "base/message_loop/message_loop.h"
#include "base/process/kill.h"
-#include "base/synchronization/waitable_event.h"
#include "base/threading/thread.h"
#include "chromeos/process_proxy/process_proxy_registry.h"