Refactor tar code to support long names.

The previous code only support names up to 99 chars. This one supports up to 1024. The code actually supports 32bit length but given that Linux, OSX and other only support 255 I set it 1024. Why 1024? Because Windows actually supports 255 wchar characters which when converted to utf-8 could be up to 1024 bytes. I'm not 100% sure the format is correct. I could not find any docs on the format, Just reverse engineered it. Looking at hex dumps by both 7zip and gnu tar it was pretty clear what it does including zeroing many of the standard fields. Review URL: http://codereview.chromium.org/159129 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@21210 0039d316-1c4b-4281-b951-d872f2087c98
author: gman@google.com <gman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2009-07-21 20:41:15 +0000
committer: gman@google.com <gman@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2009-07-21 20:41:15 +0000
commit: 41052c98c01f6575b436a478b49612722478db70 (patch)
tree: b69490176459e84859bfc4780113b6861c69b529 /o3d/import
parent: d1868637ceb9def78abc10b10c55f1642c6c6971 (diff)
download: chromium_src-41052c98c01f6575b436a478b49612722478db70.zip
chromium_src-41052c98c01f6575b436a478b49612722478db70.tar.gz
chromium_src-41052c98c01f6575b436a478b49612722478db70.tar.bz2
6 files changed, 189 insertions, 65 deletions
diff --git a/o3d/import/cross/tar_generator.cc b/o3d/import/cross/tar_generator.cc
index f59306d..0b71e69 100644
--- a/o3d/import/cross/tar_generator.cc
+++ b/o3d/import/cross/tar_generator.cc
@@ -41,7 +41,7 @@ using std::string;
 
 namespace o3d {
 
-const int kMaxFilenameSize        = 100;
+const int kMaxFilenameSizeOldFormat = 100;
 
 const int kFileNameOffset         = 0;
 const int kFileModeOffset         = 100;
@@ -54,36 +54,48 @@ const int kLinkFlagOffset         = 156;
 const int kMagicOffset            = 257;
 const int kUserNameOffset         = 265;
 const int kGroupNameOffset        = 297;
+// This is the name GNU Tar puts in a header block if the block is really
+// a long filename block.
+static const char* kLongLink = "././@LongLink";
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-void TarGenerator::AddFile(const String &file_name, size_t file_size) {
-  AddDirectoryEntryIfNeeded(file_name);
-  AddEntry(file_name, file_size, false);
+bool TarGenerator::AddFile(const String &file_name, size_t file_size) {
+  if (!AddDirectoryEntryIfNeeded(file_name)) {
+    return false;
+  }
+  return AddEntry(file_name, file_size, false);
 }
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-void TarGenerator::AddDirectory(const String &file_name) {
-  AddEntry(file_name, 0, true);
+bool TarGenerator::AddDirectory(const String &file_name) {
+  return AddEntry(file_name, 0, true);
 }
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 // We keep a map so we add a particular directory entry only once
-void TarGenerator::AddDirectoryEntryIfNeeded(const String &file_name) {
+bool TarGenerator::AddDirectoryEntryIfNeeded(const String &file_name) {
   string::size_type index = file_name.find_last_of('/');
 
   if (index != string::npos) {
     String dir_name = file_name.substr(0, index + 1);  // keep the '/' at end
     if (!directory_map_[dir_name]) {
       directory_map_[dir_name] = true;
-      AddDirectory(dir_name);
+      return AddDirectory(dir_name);
     }
   }
+  return true;
 }
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-void TarGenerator::AddEntry(const String &file_name,
-                            size_t file_size,
-                            bool is_directory) {
+
+
+void TarGenerator::WriteHeader(const String& file_name,
+                               size_t file_size,
+                               char type,
+                               int mode,
+                               int user_id,
+                               int group_id,
+                               int mod_time) {
   // first write out last data block from last file (if any)
   FlushDataBuffer(true);
 
@@ -95,32 +107,31 @@ void TarGenerator::AddEntry(const String &file_name,
   char *p = reinterpret_cast<char*>(h);
 
   // File name
-  strncpy(p, file_name.c_str(), kMaxFilenameSize - 1);
+  strncpy(p, file_name.c_str(), kMaxFilenameSizeOldFormat - 1);
 
   // File mode
-  ::snprintf(p + kFileModeOffset, 8, "%07o", is_directory ? 0755 : 0644);
+  ::snprintf(p + kFileModeOffset, 8, "%07o", mode);
 
   // UserID
-  ::snprintf(p + kUserIDOffset, 8, "%07o", 0765);
+  ::snprintf(p + kUserIDOffset, 8, "%07o", user_id);
 
   // GroupID
-  ::snprintf(p + kGroupIDOffset, 8, "%07o", 0204);
+  ::snprintf(p + kGroupIDOffset, 8, "%07o", group_id);
 
   // File size
   ::snprintf(p + kFileSizeOffset, 12, "%011o", file_size);
 
   // Modification time
   // TODO: write the correct current time here...
-  ::snprintf(p + kModifyTimeOffset, 12, "%07o", 011131753141);
+  ::snprintf(p + kModifyTimeOffset, 12, "%07o", mod_time);
 
   // Initialize Header checksum so check sum can be computed
   // by ComputeCheckSum() which will fill in the value here
   ::memset(p + kHeaderCheckSumOffset, 32, 8);
 
-  // We only support ordinary files and directories, which is fine
-  // for our use case
-  int link_flag = is_directory ? '5' : '0';
-  p[kLinkFlagOffset] = link_flag;
+  // We only support ordinary files,directories and long filename blogs, which
+  // is fine for our use case
+  p[kLinkFlagOffset] = type;
 
   // Magic offset
   ::snprintf(p + kMagicOffset, 8, "ustar  ");
@@ -131,7 +142,6 @@ void TarGenerator::AddEntry(const String &file_name,
   // Group name
   ::snprintf(p + kGroupNameOffset, 32, "staff");
 
-
   // This has to be done at the end
   ComputeCheckSum(header);
 
@@ -141,6 +151,37 @@ void TarGenerator::AddEntry(const String &file_name,
   }
 }
 
+
+bool TarGenerator::AddEntry(const String &file_name,
+                            size_t file_size,
+                            bool is_directory) {
+  // If filename is longer 99 chars, use the GNU format to write out a longer
+  // filename.
+  if (file_name.size() >= kMaxFilenameSizeOldFormat) {
+    WriteHeader(kLongLink,
+                file_name.size(),
+                'L',
+                0,
+                0,
+                0,
+                0);
+
+    MemoryReadStream stream(
+        reinterpret_cast<const uint8*>(file_name.c_str()), file_name.size());
+    AddFileBytes(&stream, file_name.size());
+  }
+
+  WriteHeader(file_name,
+              file_size,
+              is_directory ? '5' : '0',
+              is_directory ? 0755 : 0644,
+              0765,
+              0204,
+              011131753141);
+
+  return true;
+}
+
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 void TarGenerator::ComputeCheckSum(uint8 *header) {
   unsigned int checksum = 0;
diff --git a/o3d/import/cross/tar_generator.h b/o3d/import/cross/tar_generator.h
index 1468849..21bc733 100644
--- a/o3d/import/cross/tar_generator.h
+++ b/o3d/import/cross/tar_generator.h
@@ -66,9 +66,8 @@ class TarGenerator {
   virtual ~TarGenerator() { Finalize(); }
 
   // Call AddFile() for each file entry, followed by calls to AddFileBytes()
-  // for the file's data
-  virtual void AddFile(const String &file_name,
-                       size_t file_size);
+  // for the file's data. Returns true on success.
+  virtual bool AddFile(const String &file_name, size_t file_size);
 
   // Call to "push" bytes to be processed - our client will get called back
   // with the byte stream, with files rounded up to the nearest block size
@@ -79,16 +78,29 @@ class TarGenerator {
   virtual void Finalize();
 
  private:
-  void AddEntry(const String &file_name,
+  // Returns true on success.
+  bool AddEntry(const String &file_name,
                 size_t file_size,
                 bool is_directory);
 
-  void AddDirectory(const String &file_name);
-  void AddDirectoryEntryIfNeeded(const String &file_name);
+  // Returns true on success.
+  bool AddDirectory(const String &file_name);
+
+  // Returns true on success.
+  bool AddDirectoryEntryIfNeeded(const String &file_name);
 
   // Checksum for each header
   void ComputeCheckSum(uint8 *header);
 
+  // Writes a head block.
+  void WriteHeader(const String& filename,
+                   size_t file_size,
+                   char type,
+                   int mode,
+                   int user_id,
+                   int group_id,
+                   int mod_time);
+
   // flushes buffered file data to the client callback
   // if |flush_padding_zeroes| is |true| then flush a complete block
   // with zero padding even if less was buffered
diff --git a/o3d/import/cross/tar_generator_test.cc b/o3d/import/cross/tar_generator_test.cc
index 0e45b8d..9403d62 100644
--- a/o3d/import/cross/tar_generator_test.cc
+++ b/o3d/import/cross/tar_generator_test.cc
@@ -63,15 +63,19 @@ const int kMagicOffset            = 257;
 const int kUserNameOffset         = 265;
 const int kGroupNameOffset        = 297;
 
+const char *kLongLink = "././@LongLink";
 const char *kDirName1 = "test/apples/";
 const char *kDirName2 = "test/oranges/";
 const char *kFileName1 = "test/apples/file1";
 const char *kFileName2 = "test/apples/file2";
 const char *kFileName3 = "test/oranges/file3";
+const char *kFileName4 =
+    "ThisIsAFilenameLongerThen100CharsThisIsAFilenameLongerThen100Chars"
+    "ThisIsAFilenameLongerThen100CharsThisIsAFilenameLongerThen100Chars";
 
 // The first file is less than one block in size
 const char *kFileContents1 =
-    "The cellphone is the world’s most ubiquitous computer.\n"
+    "The cellphone is the world most ubiquitous computer.\n"
     "The four billion cellphones in use around the globe carry personal\n"
     "information, provide access to the Web and are being used more and more\n"
     "to navigate the real world. And as cellphones change how we live,\n"
@@ -87,11 +91,11 @@ const char *kFileContents2 =
     "levels since the credit crisis erupted. Financial shares were battered.\n"
     "And rattled investors clamored to buy rainy-day investments like gold\n"
     "and Treasury debt. It was a global wave of selling spurred by rising\n"
-    "worries about how banks, automakers — entire countries — would fare\n"
+    "worries about how banks, automakers entire countries would fare\n"
     "in a deepening global downturn.\n"
-    "'Nobody believes it’s going get better yet,' said Howard Silverblatt,\n"
-    "senior index analyst at Standard & Poor’s. 'Do you see that light at\n"
-    "the end of the tunnel? Any kind of light? Right now, it’s not there'\n"
+    "'Nobody believes it&'s going get better yet,' said Howard Silverblatt,\n"
+    "senior index analyst at Standard & Poors. 'Do you see that light at\n"
+    "the end of the tunnel? Any kind of light? Right now, it's not there'\n"
     "yet.\n";
 
 // The 3rd file takes one block
@@ -117,6 +121,10 @@ class CallbackClient : public StreamProcessor {
     VALIDATE_DIRECTORY_HEADER2,  // 3rd file is in another directory
     VALIDATE_FILE_HEADER3,
     VALIDATE_FILE_DATA3,
+    VALIDATE_FILE_LONGNAME_HEADER4,  // 4th file has a long name.
+    VALIDATE_FILE_LONGNAME_DATA4,
+    VALIDATE_FILE_HEADER4,
+    VALIDATE_FILE_DATA4,
     FINISHED
   };
 
@@ -213,6 +221,25 @@ int CallbackClient::ProcessBytes(MemoryReadStream *stream,
           ValidateData(memory_block_, kFileContents3);
           break;
 
+        case VALIDATE_FILE_LONGNAME_HEADER4:
+          ValidateHeader(memory_block_, kLongLink, strlen(kFileName4));
+          break;
+
+        case VALIDATE_FILE_LONGNAME_DATA4:
+          ValidateData(memory_block_, kFileName4);
+          break;
+
+        case VALIDATE_FILE_HEADER4: {
+          String first_99_chars(kFileName4, 99);
+          ValidateHeader(memory_block_, first_99_chars.c_str(),
+                         strlen(kFileContents3));
+          break;
+        }
+
+        case VALIDATE_FILE_DATA4:
+          ValidateData(memory_block_, kFileContents3);
+          break;
+
         case FINISHED:
           break;
       }
@@ -316,7 +343,7 @@ void CallbackClient::ValidateHeader(uint8 *header,
 
   // For now we only have directories '5' or normal files '0'
   int link_flag = header[kLinkFlagOffset];
-  EXPECT_TRUE(link_flag == '0' || link_flag == '5');
+  EXPECT_TRUE(link_flag == '0' || link_flag == '5' || link_flag == 'L');
 
   EXPECT_EQ(0, strcmp((const char*)header + kMagicOffset, "ustar  "));
 
@@ -357,20 +384,24 @@ TEST_F(TarGeneratorTest, CreateSimpleArchive) {
   const int kFileLength2 = strlen(kFileContents2);
   const int kFileLength3 = strlen(kFileContents3);
 
-  generator.AddFile(kFileName1, kFileLength1);
+  EXPECT_TRUE(generator.AddFile(kFileName1, kFileLength1));
   MemoryReadStream file1_stream(reinterpret_cast<const uint8*>(kFileContents1),
                                 kFileLength1);
   generator.AddFileBytes(&file1_stream, kFileLength1);
 
-  generator.AddFile(kFileName2, kFileLength2);
+  EXPECT_TRUE(generator.AddFile(kFileName2, kFileLength2));
   MemoryReadStream file2_stream(reinterpret_cast<const uint8*>(kFileContents2),
                                 kFileLength2);
   generator.AddFileBytes(&file2_stream, kFileLength2);
 
-  generator.AddFile(kFileName3, kFileLength3);
+  EXPECT_TRUE(generator.AddFile(kFileName3, kFileLength3));
   MemoryReadStream file3_stream(reinterpret_cast<const uint8*>(kFileContents3),
                                 kFileLength3);
   generator.AddFileBytes(&file3_stream, kFileLength3);
+  EXPECT_TRUE(generator.AddFile(kFileName4, kFileLength3));
+  MemoryReadStream file4_stream(reinterpret_cast<const uint8*>(kFileContents3),
+                                kFileLength3);
+  generator.AddFileBytes(&file4_stream, kFileLength3);
 
   generator.Finalize();
 
diff --git a/o3d/import/cross/tar_processor.cc b/o3d/import/cross/tar_processor.cc
index bb1d9b8..d126788 100644
--- a/o3d/import/cross/tar_processor.cc
+++ b/o3d/import/cross/tar_processor.cc
@@ -37,6 +37,9 @@
 
 namespace o3d {
 
+static const int kFileSizeOffset         = 124;
+static const int kLinkFlagOffset         = 156;
+
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 int TarProcessor::ProcessBytes(MemoryReadStream *stream, size_t n) {
   // Keep processing the byte-stream until we've consumed all we're given
@@ -60,26 +63,40 @@ int TarProcessor::ProcessBytes(MemoryReadStream *stream, size_t n) {
       bytes_to_consume -= bytes_to_read;
 
       if (header_bytes_read_ == TAR_HEADER_SIZE) {
-        const char *filename = (const char *)header_;
-
-        // The tar format stupidly represents size_teger values as
-        // octal strings!!
+        // The tar format stupidly represents size_t values as octal strings!!
         size_t file_size = 0;
-        sscanf(header_ + 124, "%o", &file_size);
-
-        // Only callback client if this is a "real" header
-        // (filename is not NULL)
-        // Extra zero-padding can be added by the gzip compression
-        // (at end of archive), so ignore these ones.
-        //
-        // Also, ignore entries for directories (which have zero size)
-        if (header_[0] != 0 && file_size > 0) {
-          ArchiveFileInfo info(filename, file_size);
-          callback_client_->ReceiveFileHeader(info);
-        } else if (header_[0] == 0) {
-          // If filename is NULL due to zero-padding then file size
-          // should also be NULL
-          assert(file_size == 0);
+        sscanf(header_ + kFileSizeOffset, "%o", &file_size);
+
+        // Check if it's a long filename
+        char type = header_[kLinkFlagOffset];
+        if (type == 'L') {
+          getting_filename_ = true;
+          // We should pick some size that's too large.
+          if (file_size > 1024) {
+            return -1;
+          }
+        } else {
+          getting_filename_ = false;
+          const char *filename = (const char *)header_;
+          if (!file_name_.empty()) {
+            filename = file_name_.c_str();
+          }
+
+          // Only callback client if this is a "real" header
+          // (filename is not NULL)
+          // Extra zero-padding can be added by the gzip compression
+          // (at end of archive), so ignore these ones.
+          //
+          // Also, ignore entries for directories (which have zero size)
+          if (header_[0] != 0 && file_size > 0) {
+            ArchiveFileInfo info(filename, file_size);
+            callback_client_->ReceiveFileHeader(info);
+          } else if (header_[0] == 0) {
+            // If filename is NULL due to zero-padding then file size
+            // should also be NULL
+            // TODO(gman): Won't this crash the plugin if I make a bad tar?
+            assert(file_size == 0);
+          }
         }
 
         // Round filesize up to nearest block size
@@ -89,6 +106,9 @@ int TarProcessor::ProcessBytes(MemoryReadStream *stream, size_t n) {
         // Our client doesn't want to be bothered with the block padding,
         // so only send him the actual file bytes
         client_file_bytes_to_read_ = file_size;
+
+        // Clear the file_name_ so we don't use it next time.
+        file_name_.clear();
       }
     }
 
@@ -105,9 +125,17 @@ int TarProcessor::ProcessBytes(MemoryReadStream *stream, size_t n) {
         size_t client_bytes_this_time =
             std::min(bytes_to_consume, client_file_bytes_to_read_);
 
-        if (!callback_client_->ReceiveFileData(&client_read_stream,
-                                               client_bytes_this_time)) {
-          return -1;
+        if (getting_filename_) {
+          String name_piece(
+              client_read_stream.GetDirectMemoryPointerAs<const char>(),
+              client_bytes_this_time);
+          client_read_stream.Skip(client_bytes_this_time);
+          file_name_ += name_piece;
+        } else {
+          if (!callback_client_->ReceiveFileData(&client_read_stream,
+                                                 client_bytes_this_time)) {
+            return -1;
+          }
         }
 
         client_file_bytes_to_read_ -= client_bytes_this_time;
diff --git a/o3d/import/cross/tar_processor.h b/o3d/import/cross/tar_processor.h
index e680e47..76976c3 100644
--- a/o3d/import/cross/tar_processor.h
+++ b/o3d/import/cross/tar_processor.h
@@ -47,6 +47,7 @@
 #define O3D_IMPORT_CROSS_TAR_PROCESSOR_H_
 
 #include "base/basictypes.h"
+#include "core/cross/types.h"
 #include "import/cross/memory_stream.h"
 #include "import/cross/archive_processor.h"
 
@@ -58,6 +59,7 @@ class TarProcessor : public StreamProcessor {
   explicit TarProcessor(ArchiveCallbackClient *callback_client)
       : callback_client_(callback_client),
         header_bytes_read_(0),
+        getting_filename_(false),
         file_bytes_to_read_(0) {}
 
   virtual ~TarProcessor() {}
@@ -72,7 +74,9 @@ class TarProcessor : public StreamProcessor {
 
   ArchiveCallbackClient  *callback_client_;
   size_t                  header_bytes_read_;
-  char                   header_[TAR_HEADER_SIZE];
+  char                    header_[TAR_HEADER_SIZE];
+  bool                    getting_filename_;
+  String                  file_name_;
 
   // Initialized to total number of file bytes,
   // including zero padding up to block size
diff --git a/o3d/import/cross/tar_processor_test.cc b/o3d/import/cross/tar_processor_test.cc
index 3e7afaa..1317042 100644
--- a/o3d/import/cross/tar_processor_test.cc
+++ b/o3d/import/cross/tar_processor_test.cc
@@ -42,17 +42,22 @@ class TarProcessorTest : public testing::Test {
 
 // We verify that the tar file contains exactly these filenames
 static const char *kFilename1 = "test/file1";
-static const char *kFilename2 = "test/file2";
-static const char *kFilename3 = "test/file3";
+static const char *kFilename2 =
+    "test/file1ThisIsAFilenameLongerThen100Chars"
+    "ThisIsAFilenameLongerThen100Chars"
+    "ThisIsAFilenameLongerThen100CharsThisIsAFilenameLongerThen100Chars";
+static const char *kFilename3 = "test/file2";
+static const char *kFilename4 = "test/file3";
 
 // With each file having these exact contents
-#define kFileContents1 "the cat in the hat\n"
-#define kFileContents2 "abracadabra\n"
-#define kFileContents3 "I think therefore I am\n"
 
 // we should receive these (and exactly these bytes in this order)
 static const char *kConcatenatedContents =
-    kFileContents1  kFileContents2  kFileContents3;
+  "the cat in the hat\n"       // file 1 contents.
+  "this file has a long name"  // file 2 contents.
+  "abracadabra\n"              // file 3 contents.
+  "I think therefore I am\n"   // file 4 contents.
+  "";                          // end
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 class TarTestClient : public ArchiveCallbackClient {
@@ -85,6 +90,9 @@ void TarTestClient::ReceiveFileHeader(const ArchiveFileInfo &file_info) {
     case 2:
       EXPECT_TRUE(!strcmp(kFilename3, file_info.GetFileName().c_str()));
       break;
+    case 3:
+      EXPECT_TRUE(!strcmp(kFilename4, file_info.GetFileName().c_str()));
+      break;
   }
 
   file_count_++;
author	gman@google.com <gman@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2009-07-21 20:41:15 +0000
committer	gman@google.com <gman@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2009-07-21 20:41:15 +0000
commit	41052c98c01f6575b436a478b49612722478db70 (patch)
tree	b69490176459e84859bfc4780113b6861c69b529 /o3d/import
parent	d1868637ceb9def78abc10b10c55f1642c6c6971 (diff)
download	chromium_src-41052c98c01f6575b436a478b49612722478db70.zip chromium_src-41052c98c01f6575b436a478b49612722478db70.tar.gz chromium_src-41052c98c01f6575b436a478b49612722478db70.tar.bz2