summaryrefslogtreecommitdiffstats
path: root/chrome/browser/net
diff options
context:
space:
mode:
authorjcivelli@chromium.org <jcivelli@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-28 22:07:45 +0000
committerjcivelli@chromium.org <jcivelli@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-02-28 22:07:45 +0000
commit68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67 (patch)
treedcaeccd600b517d48e28432424dd26aca088f3ad /chrome/browser/net
parent4837913958416b8cd4c0df935cdd8a8538c8d3ae (diff)
downloadchromium_src-68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67.zip
chromium_src-68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67.tar.gz
chromium_src-68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67.tar.bz2
Adds a way to encode/decode quoted-printable strings.
This is going to be used for MHTML support. BUG=30863 TEST=Run the unit-tests. Review URL: http://codereview.chromium.org/6542069 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@76273 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/net')
-rw-r--r--chrome/browser/net/quoted_printable.cc132
-rw-r--r--chrome/browser/net/quoted_printable.h35
-rw-r--r--chrome/browser/net/quoted_printable_unittest.cc206
3 files changed, 373 insertions, 0 deletions
diff --git a/chrome/browser/net/quoted_printable.cc b/chrome/browser/net/quoted_printable.cc
new file mode 100644
index 0000000..b61246b
--- /dev/null
+++ b/chrome/browser/net/quoted_printable.cc
@@ -0,0 +1,132 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/net/quoted_printable.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+
+namespace {
+
+const int kMaxCharPerLine = 76;
+const char* const kEOL = "\r\n";
+
+const char kHexTable[] = "0123456789ABCDEF";
+
+} // namespace
+
+namespace chrome {
+namespace browser {
+namespace net {
+
+void QuotedPrintableEncode(const std::string& input, std::string* output) {
+ // The number of characters in the current line.
+ int char_count = 0;
+ for (std::string::const_iterator iter = input.begin();
+ iter != input.end(); ++iter) {
+ bool last_char = (iter + 1 == input.end());
+ char c = *iter;
+ // Whether this character can be inserted without encoding.
+ bool as_is = false;
+ // All printable ASCII characters can be included as is (but for =).
+ if (c >= '!' && c <= '~' && c != '=') {
+ as_is = true;
+ }
+
+ // Space and tab characters can be included as is if they don't appear at
+ // the end of a line or at then end of the input.
+ if (!as_is && (c == '\t' || c == ' ') && !last_char &&
+ !IsEOL(iter + 1, input)) {
+ as_is = true;
+ }
+
+ // End of line should be converted to CR-LF sequences.
+ if (!last_char) {
+ int eol_len = IsEOL(iter, input);
+ if (eol_len > 0) {
+ output->append(kEOL);
+ char_count = 0;
+ iter += (eol_len - 1); // -1 because we'll ++ in the for() above.
+ continue;
+ }
+ }
+
+ // Insert a soft line break if necessary.
+ int min_chars_needed = as_is ? kMaxCharPerLine - 2 : kMaxCharPerLine - 4;
+ if (!last_char && char_count > min_chars_needed) {
+ output->append("=");
+ output->append(kEOL);
+ char_count = 0;
+ }
+
+ // Finally, insert the actual character(s).
+ if (as_is) {
+ output->append(1, c);
+ char_count++;
+ } else {
+ output->append("=");
+ output->append(1, kHexTable[static_cast<int>((c >> 4) & 0xF)]);
+ output->append(1, kHexTable[static_cast<int>(c & 0x0F)]);
+ char_count += 3;
+ }
+ }
+}
+
+bool QuotedPrintableDecode(const std::string& input, std::string* output) {
+ bool success = true;
+ for (std::string::const_iterator iter = input.begin();
+ iter!= input.end(); ++iter) {
+ char c = *iter;
+ if (c != '=') {
+ output->append(1, c);
+ continue;
+ }
+ if (input.end() - iter < 3) {
+ LOG(ERROR) << "unfinished = sequence in input string.";
+ success = false;
+ output->append(1, c);
+ continue;
+ }
+ char c2 = *(++iter);
+ char c3 = *(++iter);
+ if (c2 == '\r' && c3 == '\n') {
+ // Soft line break, ignored.
+ continue;
+ }
+
+ if (!IsHexDigit(c2) || !IsHexDigit(c3)) {
+ LOG(ERROR) << "invalid = sequence, = followed by non hexa digit " <<
+ "chars: " << c2 << " " << c3;
+ success = false;
+ // Just insert the chars as is.
+ output->append("=");
+ output->append(1, c2);
+ output->append(1, c3);
+ continue;
+ }
+
+ int i1 = HexDigitToInt(c2);
+ int i2 = HexDigitToInt(c3);
+ char r = static_cast<char>(((i1 << 4) & 0xF0) | (i2 & 0x0F));
+ output->append(1, r);
+ }
+ return success;
+}
+
+int IsEOL(const std::string::const_iterator& iter, const std::string& input) {
+ if (*iter == '\n')
+ return 1; // Single LF.
+
+ if (*iter == '\r') {
+ if ((iter + 1) == input.end() || *(iter + 1) != '\n')
+ return 1; // Single CR (Commodore and Old Macs).
+ return 2; // CR-LF.
+ }
+
+ return 0;
+}
+
+} // namespace net
+} // namespace browser
+} // namespace chrome
diff --git a/chrome/browser/net/quoted_printable.h b/chrome/browser/net/quoted_printable.h
new file mode 100644
index 0000000..310a7a7
--- /dev/null
+++ b/chrome/browser/net/quoted_printable.h
@@ -0,0 +1,35 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_NET_QUOTED_PRINTABLE_H_
+#define CHROME_BROWSER_NET_QUOTED_PRINTABLE_H_
+#pragma once
+
+#include <string>
+
+// Some functions to encode/decode with the quoted-printable encoding.
+// See http://tools.ietf.org/html/rfc2045#section-6.7
+
+namespace chrome {
+namespace browser {
+namespace net {
+
+// Encodes the input string with the quoted-printable encoding.
+void QuotedPrintableEncode(const std::string& input, std::string* output);
+
+// Decodes the quoted-printable input string. Returns true if the input string
+// was wellformed quoted-printable, false otherwise, in which case it still
+// decodes as much of the message as possible.
+bool QuotedPrintableDecode(const std::string& input, std::string* output);
+
+// Returns 0 if |iter| does not point to an end-of-line, the number of chars
+// that constitutes that EOL otherwise (1 for LF, 2 for CR-LF).
+// Exposed as it is also used in unit-tests.
+int IsEOL(const std::string::const_iterator& iter, const std::string& input);
+
+} // namespace net
+} // namespace browser
+} // namespace chrome
+
+#endif // CHROME_BROWSER_NET_QUOTED_PRINTABLE_H_
diff --git a/chrome/browser/net/quoted_printable_unittest.cc b/chrome/browser/net/quoted_printable_unittest.cc
new file mode 100644
index 0000000..68ba18e
--- /dev/null
+++ b/chrome/browser/net/quoted_printable_unittest.cc
@@ -0,0 +1,206 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/basictypes.h"
+#include "chrome/browser/net/quoted_printable.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+class QuotedPrintableTest : public testing::Test {
+};
+
+const char* const kNormalText[] = {
+ // Basic sentence with an =.
+ "If you believe that truth=beauty, then surely mathematics is the most "
+ "beautiful branch of philosophy.",
+
+ // All ASCII chars.
+ "\x1\x2\x3\x4\x5\x6\x7\x8\x9\xA\xB\xC\xD\xE\xF"
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+ "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
+ "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
+ "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
+ "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
+ "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
+ "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
+ "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
+ "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
+ "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
+ "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
+ "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
+ "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF",
+
+ // Space right before max char per line.
+ "This line has a space at the 75 characters mark = ********************** "
+ "The end.",
+
+ // Space on max char per line index.
+ "This line has a space at the 76 characters mark = *********************** "
+ "The end.",
+
+ // Space at end of line.
+ "This line ends with a space \r\nThe end.",
+
+ // Space at end of input.
+ "This input ends with a space. ",
+
+ // Tab right before max char per line.
+ "This line has a tab at the 75 characters mark = ************************\t"
+ "The end.",
+
+ // Tab on max char per line index.
+ "This line has a tab at the 76 characters mark = *************************\t"
+ "The end.",
+
+ // Tab at end of line.
+ "This line ends with a tab\t\r\nThe end.",
+
+ // Tab at end of input.
+ "This input ends with a tab.\t",
+
+ // Various EOLs in input.
+ "This is a test of having EOLs in the input\r\n"
+ "Any EOL should be converted \r to \n a CRLF \r\n."
+};
+
+const char* const kEncodedText[] = {
+ "If you believe that truth=3Dbeauty, then surely mathematics is the most "
+ "bea=\r\nutiful branch of philosophy.",
+
+ "=01=02=03=04=05=06=07=08=09\r\n"
+ "=0B=0C\r\n"
+ "=0E=0F=10=11=12=13=14=15=16=17=18=19=1A=1B=1C=1D=1E=1F !\"#$%&'()*+,-./01234"
+ "=\r\n"
+ "56789:;<=3D>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}"
+ "=\r\n"
+ "~=7F=80=81=82=83=84=85=86=87=88=89=8A=8B=8C=8D=8E=8F=90=91=92=93=94=95=96="
+ "\r\n"
+ "=97=98=99=9A=9B=9C=9D=9E=9F=A0=A1=A2=A3=A4=A5=A6=A7=A8=A9=AA=AB=AC=AD=AE=AF="
+ "\r\n"
+ "=B0=B1=B2=B3=B4=B5=B6=B7=B8=B9=BA=BB=BC=BD=BE=BF=C0=C1=C2=C3=C4=C5=C6=C7=C8="
+ "\r\n"
+ "=C9=CA=CB=CC=CD=CE=CF=D0=D1=D2=D3=D4=D5=D6=D7=D8=D9=DA=DB=DC=DD=DE=DF=E0=E1="
+ "\r\n"
+ "=E2=E3=E4=E5=E6=E7=E8=E9=EA=EB=EC=ED=EE=EF=F0=F1=F2=F3=F4=F5=F6=F7=F8=F9=FA="
+ "\r\n"
+ "=FB=FC=FD=FE=FF",
+
+ "This line has a space at the 75 characters mark =3D ********************** ="
+ "\r\nThe end.",
+
+ "This line has a space at the 76 characters mark =3D ***********************="
+ "\r\n The end.",
+
+ "This line ends with a space=20\r\nThe end.",
+
+ "This input ends with a space.=20",
+
+ "This line has a tab at the 75 characters mark =3D ************************\t"
+ "=\r\nThe end.",
+
+ "This line has a tab at the 76 characters mark =3D *************************="
+ "\r\n\tThe end.",
+
+ "This line ends with a tab=09\r\nThe end.",
+
+ "This input ends with a tab.=09",
+
+ "This is a test of having EOLs in the input\r\n"
+ "Any EOL should be converted=20\r\n to=20\r\n a CRLF=20\r\n."
+};
+
+
+const char* const kBadEncodedText[] = {
+ // Invalid finish with =.
+ "A =3D at the end of the input is bad=",
+
+ // Invalid = sequence.
+ "This line contains a valid =3D sequence and invalid ones =$$ = =\t =1 = 2 "
+ "==",
+};
+
+const char* const kBadEncodedTextDecoded[] = {
+ "A = at the end of the input is bad=",
+
+ "This line contains a valid = sequence and invalid ones =$$ = =\t =1 = 2 ==",
+};
+
+// Compares the 2 strings and returns true if they are identical, but for EOLs
+// that don't have to be the same (ex: \r\n can match \n).
+bool CompareEOLInsensitive(const std::string& s1, const std::string& s2) {
+ std::string::const_iterator s1_iter = s1.begin();
+ std::string::const_iterator s2_iter = s2.begin();
+
+ while (true) {
+ if (s1_iter == s1.end() && s2_iter == s2.end())
+ return true;
+ if ((s1_iter == s1.end() && s2_iter != s2.end()) ||
+ (s1_iter != s1.end() && s2_iter == s2.end())) {
+ return false;
+ }
+ int s1_eol = chrome::browser::net::IsEOL(s1_iter, s1);
+ int s2_eol = chrome::browser::net::IsEOL(s2_iter, s2);
+ if ((!s1_eol && s2_eol) || (s1_eol && !s2_eol)) {
+ // Unmatched EOL.
+ return false;
+ }
+ if (s1_eol > 0) {
+ s1_iter += s1_eol;
+ s2_iter += s2_eol;
+ } else {
+ // Non-EOL char.
+ if (*s1_iter != *s2_iter)
+ return false;
+ s1_iter++;
+ s2_iter++;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+TEST(QuotedPrintableTest, Encode) {
+ ASSERT_EQ(arraysize(kNormalText), arraysize(kEncodedText));
+ for (size_t i = 0; i < arraysize(kNormalText); ++i) {
+ SCOPED_TRACE(::testing::Message::Message() << "Iteration " << i);
+ std::string output;
+ chrome::browser::net::QuotedPrintableEncode(kNormalText[i], &output);
+ std::string expected(kEncodedText[i]);
+ EXPECT_EQ(expected, output);
+ }
+}
+
+TEST(QuotedPrintableTest, Decode) {
+ ASSERT_EQ(arraysize(kNormalText), arraysize(kEncodedText));
+ for (size_t i = 0; i < arraysize(kNormalText); ++i) {
+ std::string output;
+ EXPECT_TRUE(chrome::browser::net::QuotedPrintableDecode(
+ kEncodedText[i], &output));
+ std::string expected(kNormalText[i]);
+ SCOPED_TRACE(::testing::Message::Message() << "Iteration " << i <<
+ "\n Actual=\n" << output << "\n Expected=\n" <<
+ expected);
+ // We cannot test for equality as EOLs won't match the normal text
+ // (as any EOL is converted to a CRLF during encoding).
+ EXPECT_TRUE(CompareEOLInsensitive(expected, output));
+ }
+}
+
+// Tests that we return false but still do our best to decode badly encoded
+// inputs.
+TEST(QuotedPrintableTest, DecodeBadInput) {
+ ASSERT_EQ(arraysize(kBadEncodedText), arraysize(kBadEncodedTextDecoded));
+ for (size_t i = 0; i < arraysize(kBadEncodedText); ++i) {
+ SCOPED_TRACE(::testing::Message::Message() << "Iteration " << i);
+ std::string output;
+ EXPECT_FALSE(chrome::browser::net::QuotedPrintableDecode(
+ kBadEncodedText[i], &output));
+ std::string expected(kBadEncodedTextDecoded[i]);
+ EXPECT_EQ(expected, output);
+ }
+}