summaryrefslogtreecommitdiffstats
path: root/content/browser/download/save_package.h
blob: 906091632a72d82c1491833ce3725e4c2ccb7513 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
#define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_

#include <queue>
#include <set>
#include <string>
#include <vector>

#include "base/basictypes.h"
#include "base/file_path.h"
#include "base/gtest_prod_util.h"
#include "base/hash_tables.h"
#include "base/memory/ref_counted.h"
#include "base/memory/weak_ptr.h"
#include "base/time.h"
#include "content/common/content_export.h"
#include "content/public/browser/download_item.h"
#include "content/public/browser/download_manager_delegate.h"
#include "content/public/browser/save_page_type.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/common/referrer.h"
#include "googleurl/src/gurl.h"

class GURL;

namespace content {
class DownloadItemImpl;
class DownloadManagerImpl;
class WebContents;
class SaveFileManager;
class SaveItem;
class SavePackage;
struct SaveFileCreateInfo;

// The SavePackage object manages the process of saving a page as only-html or
// complete-html or MHTML and providing the information for displaying saving
// status.  Saving page as only-html means means that we save web page to a
// single HTML file regardless internal sub resources and sub frames.  Saving
// page as complete-html page means we save not only the main html file the user
// told it to save but also a directory for the auxiliary files such as all
// sub-frame html files, image files, css files and js files.  Saving page as
// MHTML means the same thing as complete-html, but it uses the MHTML format to
// contain the html and all auxiliary files in a single text file.
//
// Each page saving job may include one or multiple files which need to be
// saved. Each file is represented by a SaveItem, and all SaveItems are owned
// by the SavePackage. SaveItems are created when a user initiates a page
// saving job, and exist for the duration of one contents's life time.
class CONTENT_EXPORT SavePackage
    : public base::RefCountedThreadSafe<SavePackage>,
      public WebContentsObserver,
      public DownloadItem::Observer,
      public base::SupportsWeakPtr<SavePackage> {
 public:
  enum WaitState {
    // State when created but not initialized.
    INITIALIZE = 0,
    // State when after initializing, but not yet saving.
    START_PROCESS,
    // Waiting on a list of savable resources from the backend.
    RESOURCES_LIST,
    // Waiting for data sent from net IO or from file system.
    NET_FILES,
    // Waiting for html DOM data sent from render process.
    HTML_DATA,
    // Saving page finished successfully.
    SUCCESSFUL,
    // Failed to save page.
    FAILED
  };

  static const FilePath::CharType kDefaultHtmlExtension[];

  // Constructor for user initiated page saving. This constructor results in a
  // SavePackage that will generate and sanitize a suggested name for the user
  // in the "Save As" dialog box.
  explicit SavePackage(WebContents* web_contents);

  // This contructor is used only for testing. We can bypass the file and
  // directory name generation / sanitization by providing well known paths
  // better suited for tests.
  SavePackage(WebContents* web_contents,
              SavePageType save_type,
              const FilePath& file_full_path,
              const FilePath& directory_full_path);

  // Initialize the SavePackage. Returns true if it initializes properly.  Need
  // to make sure that this method must be called in the UI thread because using
  // g_browser_process on a non-UI thread can cause crashes during shutdown.
  // |cb| will be called when the DownloadItem is created, before data is
  // written to disk.
  bool Init(const SavePackageDownloadCreatedCallback& cb);

  // Cancel all in progress request, might be called by user or internal error.
  void Cancel(bool user_action);

  void Finish();

  // Notifications sent from the file thread to the UI thread.
  void StartSave(const SaveFileCreateInfo* info);
  bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
  void SaveFinished(int32 save_id, int64 size, bool is_success);
  void SaveFailed(const GURL& save_url);
  void SaveCanceled(SaveItem* save_item);

  // Rough percent complete, -1 means we don't know (since we didn't receive a
  // total size).
  int PercentComplete();

  bool canceled() const { return user_canceled_ || disk_error_occurred_; }
  bool finished() const { return finished_; }
  SavePageType save_type() const { return save_type_; }
  int contents_id() const { return contents_id_; }
  int id() const { return unique_id_; }
  WebContents* web_contents() const;

  void GetSaveInfo();

 private:
  friend class base::RefCountedThreadSafe<SavePackage>;

  // Callback for WebContents::GenerateMHTML().
  void OnMHTMLGenerated(const FilePath& path, int64 size);

  // For testing only.
  SavePackage(WebContents* web_contents,
              const FilePath& file_full_path,
              const FilePath& directory_full_path);

  virtual ~SavePackage();

  // Notes from Init() above applies here as well.
  void InternalInit();

  void Stop();
  void CheckFinish();
  void SaveNextFile(bool process_all_remainder_items);
  void DoSavingProcess();

  // WebContentsObserver implementation.
  virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;

  // DownloadItem::Observer implementation.
  virtual void OnDownloadDestroyed(DownloadItem* download) OVERRIDE;

  // Update the download history of this item upon completion.
  void FinalizeDownloadEntry();

  // Detach from DownloadManager.
  void StopObservation();

  // Return max length of a path for a specific base directory.
  // This is needed on POSIX, which restrict the length of file names in
  // addition to the restriction on the length of path names.
  // |base_dir| is assumed to be a directory name with no trailing slash.
  static uint32 GetMaxPathLengthForDirectory(const FilePath& base_dir);

  static bool GetSafePureFileName(const FilePath& dir_path,
                                  const FilePath::StringType& file_name_ext,
                                  uint32 max_file_path_len,
                                  FilePath::StringType* pure_file_name);

  // Create a file name based on the response from the server.
  bool GenerateFileName(const std::string& disposition,
                        const GURL& url,
                        bool need_html_ext,
                        FilePath::StringType* generated_name);

  // Get all savable resource links from current web page, include main
  // frame and sub-frame.
  void GetAllSavableResourceLinksForCurrentPage();
  // Get html data by serializing all frames of current page with lists
  // which contain all resource links that have local copy.
  void GetSerializedHtmlDataForCurrentPageWithLocalLinks();

  // Look up SaveItem by save id from in progress map.
  SaveItem* LookupItemInProcessBySaveId(int32 save_id);

  // Remove SaveItem from in progress map and put it to saved map.
  void PutInProgressItemToSavedMap(SaveItem* save_item);

  // Retrieves the URL to be saved from the WebContents.
  GURL GetUrlToBeSaved();

  void CreateDirectoryOnFileThread(const FilePath& website_save_dir,
                                   const FilePath& download_save_dir,
                                   bool skip_dir_check,
                                   const std::string& mime_type,
                                   const std::string& accept_langs);
  void ContinueGetSaveInfo(const FilePath& suggested_path,
                           bool can_save_as_complete);
  void OnPathPicked(
      const FilePath& final_name,
      SavePageType type,
      const SavePackageDownloadCreatedCallback& cb);
  void OnReceivedSavableResourceLinksForCurrentPage(
      const std::vector<GURL>& resources_list,
      const std::vector<Referrer>& referrers_list,
      const std::vector<GURL>& frames_list);

  void OnReceivedSerializedHtmlData(const GURL& frame_url,
                                    const std::string& data,
                                    int32 status);

  typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap;
  // in_progress_items_ is map of all saving job in in-progress state.
  SaveUrlItemMap in_progress_items_;
  // saved_failed_items_ is map of all saving job which are failed.
  SaveUrlItemMap saved_failed_items_;

  // The number of in process SaveItems.
  int in_process_count() const {
    return static_cast<int>(in_progress_items_.size());
  }

  // The number of all SaveItems which have completed, including success items
  // and failed items.
  int completed_count() const {
    return static_cast<int>(saved_success_items_.size() +
                            saved_failed_items_.size());
  }

  // The current speed in files per second. This is used to update the
  // DownloadItem associated to this SavePackage. The files per second is
  // presented by the DownloadItem to the UI as bytes per second, which is
  // not correct but matches the way the total and received number of files is
  // presented as the total and received bytes.
  int64 CurrentSpeed() const;

  // Helper function for preparing suggested name for the SaveAs Dialog. The
  // suggested name is determined by the web document's title.
  FilePath GetSuggestedNameForSaveAs(
      bool can_save_as_complete,
      const std::string& contents_mime_type,
      const std::string& accept_langs);

  // Ensures that the file name has a proper extension for HTML by adding ".htm"
  // if necessary.
  static FilePath EnsureHtmlExtension(const FilePath& name);

  // Ensures that the file name has a proper extension for supported formats
  // if necessary.
  static FilePath EnsureMimeExtension(const FilePath& name,
      const std::string& contents_mime_type);

  // Returns extension for supported MIME types (for example, for "text/plain"
  // it returns "txt").
  static const FilePath::CharType* ExtensionForMimeType(
      const std::string& contents_mime_type);

  typedef std::queue<SaveItem*> SaveItemQueue;
  // A queue for items we are about to start saving.
  SaveItemQueue waiting_item_queue_;

  typedef base::hash_map<int32, SaveItem*> SavedItemMap;
  // saved_success_items_ is map of all saving job which are successfully saved.
  SavedItemMap saved_success_items_;

  // Non-owning pointer for handling file writing on the file thread.
  SaveFileManager* file_manager_;

  // DownloadManager owns the DownloadItem and handles history and UI.
  DownloadManagerImpl* download_manager_;
  DownloadItemImpl* download_;

  // The URL of the page the user wants to save.
  GURL page_url_;
  FilePath saved_main_file_path_;
  FilePath saved_main_directory_path_;

  // The title of the page the user wants to save.
  string16 title_;

  // Used to calculate package download speed (in files per second).
  base::TimeTicks start_tick_;

  // Indicates whether the actual saving job is finishing or not.
  bool finished_;

  // Indicates whether a call to Finish() has been scheduled.
  bool mhtml_finishing_;

  // Indicates whether user canceled the saving job.
  bool user_canceled_;

  // Indicates whether user get disk error.
  bool disk_error_occurred_;

  // Type about saving page as only-html or complete-html.
  SavePageType save_type_;

  // Number of all need to be saved resources.
  size_t all_save_items_count_;

  typedef std::set<FilePath::StringType,
                   bool (*)(const FilePath::StringType&,
                            const FilePath::StringType&)> FileNameSet;
  // This set is used to eliminate duplicated file names in saving directory.
  FileNameSet file_name_set_;

  typedef base::hash_map<FilePath::StringType, uint32> FileNameCountMap;
  // This map is used to track serial number for specified filename.
  FileNameCountMap file_name_count_map_;

  // Indicates current waiting state when SavePackage try to get something
  // from outside.
  WaitState wait_state_;

  // Since for one contents, it can only have one SavePackage in same time.
  // Now we actually use render_process_id as the contents's unique id.
  const int contents_id_;

  // Unique ID for this SavePackage.
  const int unique_id_;

  // Variables to record errors that happened so we can record them via
  // UMA statistics.
  bool wrote_to_completed_file_;
  bool wrote_to_failed_file_;

  friend class SavePackageTest;
  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames);
  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename);

  DISALLOW_COPY_AND_ASSIGN(SavePackage);
};

}  // namespace content

#endif  // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_