diff options
author | jcivelli@chromium.org <jcivelli@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-28 22:07:45 +0000 |
---|---|---|
committer | jcivelli@chromium.org <jcivelli@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-28 22:07:45 +0000 |
commit | 68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67 (patch) | |
tree | dcaeccd600b517d48e28432424dd26aca088f3ad /chrome/browser/net | |
parent | 4837913958416b8cd4c0df935cdd8a8538c8d3ae (diff) | |
download | chromium_src-68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67.zip chromium_src-68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67.tar.gz chromium_src-68b8c3e7c6627e8d2e9c46a7c1e44eff9e562b67.tar.bz2 |
Adds a way to encode/decode quoted-printable strings.
This is going to be used for MHTML support.
BUG=30863
TEST=Run the unit-tests.
Review URL: http://codereview.chromium.org/6542069
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@76273 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/browser/net')
-rw-r--r-- | chrome/browser/net/quoted_printable.cc | 132 | ||||
-rw-r--r-- | chrome/browser/net/quoted_printable.h | 35 | ||||
-rw-r--r-- | chrome/browser/net/quoted_printable_unittest.cc | 206 |
3 files changed, 373 insertions, 0 deletions
diff --git a/chrome/browser/net/quoted_printable.cc b/chrome/browser/net/quoted_printable.cc new file mode 100644 index 0000000..b61246b --- /dev/null +++ b/chrome/browser/net/quoted_printable.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/net/quoted_printable.h" + +#include "base/logging.h" +#include "base/string_util.h" + +namespace { + +const int kMaxCharPerLine = 76; +const char* const kEOL = "\r\n"; + +const char kHexTable[] = "0123456789ABCDEF"; + +} // namespace + +namespace chrome { +namespace browser { +namespace net { + +void QuotedPrintableEncode(const std::string& input, std::string* output) { + // The number of characters in the current line. + int char_count = 0; + for (std::string::const_iterator iter = input.begin(); + iter != input.end(); ++iter) { + bool last_char = (iter + 1 == input.end()); + char c = *iter; + // Whether this character can be inserted without encoding. + bool as_is = false; + // All printable ASCII characters can be included as is (but for =). + if (c >= '!' && c <= '~' && c != '=') { + as_is = true; + } + + // Space and tab characters can be included as is if they don't appear at + // the end of a line or at then end of the input. + if (!as_is && (c == '\t' || c == ' ') && !last_char && + !IsEOL(iter + 1, input)) { + as_is = true; + } + + // End of line should be converted to CR-LF sequences. + if (!last_char) { + int eol_len = IsEOL(iter, input); + if (eol_len > 0) { + output->append(kEOL); + char_count = 0; + iter += (eol_len - 1); // -1 because we'll ++ in the for() above. + continue; + } + } + + // Insert a soft line break if necessary. + int min_chars_needed = as_is ? kMaxCharPerLine - 2 : kMaxCharPerLine - 4; + if (!last_char && char_count > min_chars_needed) { + output->append("="); + output->append(kEOL); + char_count = 0; + } + + // Finally, insert the actual character(s). + if (as_is) { + output->append(1, c); + char_count++; + } else { + output->append("="); + output->append(1, kHexTable[static_cast<int>((c >> 4) & 0xF)]); + output->append(1, kHexTable[static_cast<int>(c & 0x0F)]); + char_count += 3; + } + } +} + +bool QuotedPrintableDecode(const std::string& input, std::string* output) { + bool success = true; + for (std::string::const_iterator iter = input.begin(); + iter!= input.end(); ++iter) { + char c = *iter; + if (c != '=') { + output->append(1, c); + continue; + } + if (input.end() - iter < 3) { + LOG(ERROR) << "unfinished = sequence in input string."; + success = false; + output->append(1, c); + continue; + } + char c2 = *(++iter); + char c3 = *(++iter); + if (c2 == '\r' && c3 == '\n') { + // Soft line break, ignored. + continue; + } + + if (!IsHexDigit(c2) || !IsHexDigit(c3)) { + LOG(ERROR) << "invalid = sequence, = followed by non hexa digit " << + "chars: " << c2 << " " << c3; + success = false; + // Just insert the chars as is. + output->append("="); + output->append(1, c2); + output->append(1, c3); + continue; + } + + int i1 = HexDigitToInt(c2); + int i2 = HexDigitToInt(c3); + char r = static_cast<char>(((i1 << 4) & 0xF0) | (i2 & 0x0F)); + output->append(1, r); + } + return success; +} + +int IsEOL(const std::string::const_iterator& iter, const std::string& input) { + if (*iter == '\n') + return 1; // Single LF. + + if (*iter == '\r') { + if ((iter + 1) == input.end() || *(iter + 1) != '\n') + return 1; // Single CR (Commodore and Old Macs). + return 2; // CR-LF. + } + + return 0; +} + +} // namespace net +} // namespace browser +} // namespace chrome diff --git a/chrome/browser/net/quoted_printable.h b/chrome/browser/net/quoted_printable.h new file mode 100644 index 0000000..310a7a7 --- /dev/null +++ b/chrome/browser/net/quoted_printable.h @@ -0,0 +1,35 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_NET_QUOTED_PRINTABLE_H_ +#define CHROME_BROWSER_NET_QUOTED_PRINTABLE_H_ +#pragma once + +#include <string> + +// Some functions to encode/decode with the quoted-printable encoding. +// See http://tools.ietf.org/html/rfc2045#section-6.7 + +namespace chrome { +namespace browser { +namespace net { + +// Encodes the input string with the quoted-printable encoding. +void QuotedPrintableEncode(const std::string& input, std::string* output); + +// Decodes the quoted-printable input string. Returns true if the input string +// was wellformed quoted-printable, false otherwise, in which case it still +// decodes as much of the message as possible. +bool QuotedPrintableDecode(const std::string& input, std::string* output); + +// Returns 0 if |iter| does not point to an end-of-line, the number of chars +// that constitutes that EOL otherwise (1 for LF, 2 for CR-LF). +// Exposed as it is also used in unit-tests. +int IsEOL(const std::string::const_iterator& iter, const std::string& input); + +} // namespace net +} // namespace browser +} // namespace chrome + +#endif // CHROME_BROWSER_NET_QUOTED_PRINTABLE_H_ diff --git a/chrome/browser/net/quoted_printable_unittest.cc b/chrome/browser/net/quoted_printable_unittest.cc new file mode 100644 index 0000000..68ba18e --- /dev/null +++ b/chrome/browser/net/quoted_printable_unittest.cc @@ -0,0 +1,206 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/basictypes.h" +#include "chrome/browser/net/quoted_printable.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +class QuotedPrintableTest : public testing::Test { +}; + +const char* const kNormalText[] = { + // Basic sentence with an =. + "If you believe that truth=beauty, then surely mathematics is the most " + "beautiful branch of philosophy.", + + // All ASCII chars. + "\x1\x2\x3\x4\x5\x6\x7\x8\x9\xA\xB\xC\xD\xE\xF" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F" + "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F" + "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F" + "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F" + "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F" + "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" + "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" + "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF" + "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF" + "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF" + "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF" + "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF", + + // Space right before max char per line. + "This line has a space at the 75 characters mark = ********************** " + "The end.", + + // Space on max char per line index. + "This line has a space at the 76 characters mark = *********************** " + "The end.", + + // Space at end of line. + "This line ends with a space \r\nThe end.", + + // Space at end of input. + "This input ends with a space. ", + + // Tab right before max char per line. + "This line has a tab at the 75 characters mark = ************************\t" + "The end.", + + // Tab on max char per line index. + "This line has a tab at the 76 characters mark = *************************\t" + "The end.", + + // Tab at end of line. + "This line ends with a tab\t\r\nThe end.", + + // Tab at end of input. + "This input ends with a tab.\t", + + // Various EOLs in input. + "This is a test of having EOLs in the input\r\n" + "Any EOL should be converted \r to \n a CRLF \r\n." +}; + +const char* const kEncodedText[] = { + "If you believe that truth=3Dbeauty, then surely mathematics is the most " + "bea=\r\nutiful branch of philosophy.", + + "=01=02=03=04=05=06=07=08=09\r\n" + "=0B=0C\r\n" + "=0E=0F=10=11=12=13=14=15=16=17=18=19=1A=1B=1C=1D=1E=1F !\"#$%&'()*+,-./01234" + "=\r\n" + "56789:;<=3D>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}" + "=\r\n" + "~=7F=80=81=82=83=84=85=86=87=88=89=8A=8B=8C=8D=8E=8F=90=91=92=93=94=95=96=" + "\r\n" + "=97=98=99=9A=9B=9C=9D=9E=9F=A0=A1=A2=A3=A4=A5=A6=A7=A8=A9=AA=AB=AC=AD=AE=AF=" + "\r\n" + "=B0=B1=B2=B3=B4=B5=B6=B7=B8=B9=BA=BB=BC=BD=BE=BF=C0=C1=C2=C3=C4=C5=C6=C7=C8=" + "\r\n" + "=C9=CA=CB=CC=CD=CE=CF=D0=D1=D2=D3=D4=D5=D6=D7=D8=D9=DA=DB=DC=DD=DE=DF=E0=E1=" + "\r\n" + "=E2=E3=E4=E5=E6=E7=E8=E9=EA=EB=EC=ED=EE=EF=F0=F1=F2=F3=F4=F5=F6=F7=F8=F9=FA=" + "\r\n" + "=FB=FC=FD=FE=FF", + + "This line has a space at the 75 characters mark =3D ********************** =" + "\r\nThe end.", + + "This line has a space at the 76 characters mark =3D ***********************=" + "\r\n The end.", + + "This line ends with a space=20\r\nThe end.", + + "This input ends with a space.=20", + + "This line has a tab at the 75 characters mark =3D ************************\t" + "=\r\nThe end.", + + "This line has a tab at the 76 characters mark =3D *************************=" + "\r\n\tThe end.", + + "This line ends with a tab=09\r\nThe end.", + + "This input ends with a tab.=09", + + "This is a test of having EOLs in the input\r\n" + "Any EOL should be converted=20\r\n to=20\r\n a CRLF=20\r\n." +}; + + +const char* const kBadEncodedText[] = { + // Invalid finish with =. + "A =3D at the end of the input is bad=", + + // Invalid = sequence. + "This line contains a valid =3D sequence and invalid ones =$$ = =\t =1 = 2 " + "==", +}; + +const char* const kBadEncodedTextDecoded[] = { + "A = at the end of the input is bad=", + + "This line contains a valid = sequence and invalid ones =$$ = =\t =1 = 2 ==", +}; + +// Compares the 2 strings and returns true if they are identical, but for EOLs +// that don't have to be the same (ex: \r\n can match \n). +bool CompareEOLInsensitive(const std::string& s1, const std::string& s2) { + std::string::const_iterator s1_iter = s1.begin(); + std::string::const_iterator s2_iter = s2.begin(); + + while (true) { + if (s1_iter == s1.end() && s2_iter == s2.end()) + return true; + if ((s1_iter == s1.end() && s2_iter != s2.end()) || + (s1_iter != s1.end() && s2_iter == s2.end())) { + return false; + } + int s1_eol = chrome::browser::net::IsEOL(s1_iter, s1); + int s2_eol = chrome::browser::net::IsEOL(s2_iter, s2); + if ((!s1_eol && s2_eol) || (s1_eol && !s2_eol)) { + // Unmatched EOL. + return false; + } + if (s1_eol > 0) { + s1_iter += s1_eol; + s2_iter += s2_eol; + } else { + // Non-EOL char. + if (*s1_iter != *s2_iter) + return false; + s1_iter++; + s2_iter++; + } + } + return true; +} + +} // namespace + +TEST(QuotedPrintableTest, Encode) { + ASSERT_EQ(arraysize(kNormalText), arraysize(kEncodedText)); + for (size_t i = 0; i < arraysize(kNormalText); ++i) { + SCOPED_TRACE(::testing::Message::Message() << "Iteration " << i); + std::string output; + chrome::browser::net::QuotedPrintableEncode(kNormalText[i], &output); + std::string expected(kEncodedText[i]); + EXPECT_EQ(expected, output); + } +} + +TEST(QuotedPrintableTest, Decode) { + ASSERT_EQ(arraysize(kNormalText), arraysize(kEncodedText)); + for (size_t i = 0; i < arraysize(kNormalText); ++i) { + std::string output; + EXPECT_TRUE(chrome::browser::net::QuotedPrintableDecode( + kEncodedText[i], &output)); + std::string expected(kNormalText[i]); + SCOPED_TRACE(::testing::Message::Message() << "Iteration " << i << + "\n Actual=\n" << output << "\n Expected=\n" << + expected); + // We cannot test for equality as EOLs won't match the normal text + // (as any EOL is converted to a CRLF during encoding). + EXPECT_TRUE(CompareEOLInsensitive(expected, output)); + } +} + +// Tests that we return false but still do our best to decode badly encoded +// inputs. +TEST(QuotedPrintableTest, DecodeBadInput) { + ASSERT_EQ(arraysize(kBadEncodedText), arraysize(kBadEncodedTextDecoded)); + for (size_t i = 0; i < arraysize(kBadEncodedText); ++i) { + SCOPED_TRACE(::testing::Message::Message() << "Iteration " << i); + std::string output; + EXPECT_FALSE(chrome::browser::net::QuotedPrintableDecode( + kBadEncodedText[i], &output)); + std::string expected(kBadEncodedTextDecoded[i]); + EXPECT_EQ(expected, output); + } +} |