summaryrefslogtreecommitdiffstats
path: root/chrome/browser/download/save_package.h
blob: 7b6edcd7de67f976039ad9b4a7665a5cd7d3a90a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// The SavePackage object manages the process of saving a page as only-html or
// complete-html and providing the information for displaying saving status.
// Saving page as only-html means means that we save web page to a single HTML
// file regardless internal sub resources and sub frames.
// Saving page as complete-html page means we save not only the main html file
// the user told it to save but also a directory for the auxiliary files such
// as all sub-frame html files, image files, css files and js files.
//
// Each page saving job may include one or multiple files which need to be
// saved. Each file is represented by a SaveItem, and all SaveItems are owned
// by the SavePackage. SaveItems are created when a user initiates a page
// saving job, and exist for the duration of one tab's life time.

#ifndef CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H__
#define CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H__

#include <string>
#include <vector>
#include <queue>
#include <utility>

#include "base/basictypes.h"
#include "base/hash_tables.h"
#include "base/ref_counted.h"
#include "base/time.h"
#include "chrome/common/pref_member.h"
#include "chrome/browser/download/save_item.h"
#include "chrome/browser/download/save_types.h"

class SaveFileManager;
class SavePackage;
class DownloadItem;
class GURL;
class MessageLoop;
class PrefService;
class Profile;
class WebContents;
class URLRequestContext;
class WebContents;
class Time;

namespace base {
class Thread;
}

// save package: manages all save item.
class SavePackage : public base::RefCountedThreadSafe<SavePackage> {
 public:
  enum SavePackageType {
    // User chose to save only the HTML of the page.
    SAVE_AS_ONLY_HTML = 0,
    // User chose to save complete-html page.
    SAVE_AS_COMPLETE_HTML = 1
  };

  enum WaitState {
    // State when created but not initialized.
    INITIALIZE = 0,
    // State when after initializing, but not yet saving.
    START_PROCESS,
    // Waiting on a list of savable resources from the backend.
    RESOURCES_LIST,
    // Waiting for data sent from net IO or from file system.
    NET_FILES,
    // Waiting for html DOM data sent from render process.
    HTML_DATA,
    // Saving page finished successfully.
    SUCCESSFUL,
    // Failed to save page.
    FAILED
  };

  SavePackage(WebContents* web_content,
              SavePackageType save_type,
              const std::wstring& file_full_path,
              const std::wstring& directory_full_path);

  ~SavePackage();

  // Initialize the SavePackage. Returns true if it initializes properly.
  // Need to make sure that this method must be called in the UI thread because
  // using g_browser_process on a non-UI thread can cause crashes during
  // shutdown.
  bool Init();

  void Cancel(bool user_action);

  void Finish();

  // Notifications sent from the file thread to the UI thread.
  void StartSave(const SaveFileCreateInfo* info);
  bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
  void SaveFinished(int32 save_id, int64 size, bool is_success);
  void SaveFailed(const std::wstring& save_url);
  void SaveCanceled(SaveItem* save_item);

  // Process current page's all savable links of sub resources, resources'
  // referrer and frames(include main frame and sub frames) gotten from
  // render process.
  void ProcessCurrentPageAllSavableResourceLinks(
      const std::vector<GURL>& resources_list,
      const std::vector<GURL>& referrers_list,
      const std::vector<GURL>& frames_list);

  // Process the serialized html content data of a specified web page
  // gotten from render process.
  void ProcessSerializedHtmlData(const GURL& frame_url,
                                 const std::string& data,
                                 int32 status);

  // Rough percent complete, -1 means we don't know (since we didn't receive a
  // total size).
  int PercentComplete();

  // Show or Open a saved page via the Windows shell.
  void ShowDownloadInShell();

  bool canceled() { return user_canceled_ || disk_error_occurred_; }

  // Accessor
  bool finished() { return finished_; }
  SavePackageType save_type() { return save_type_; }

  // Since for one tab, it can only have one SavePackage in same time.
  // Now we actually use render_process_id as tab's unique id.
  int tab_id() const { return tab_id_; }

  // Helper function for preparing suggested name for the SaveAs Dialog. The
  // suggested name is composed of the default save path and the web document's
  // title.
  static std::wstring GetSuggestNameForSaveAs(PrefService* prefs,
                                              const std::wstring& name);

  // This structure is for storing parameters which we will use to create
  // a SavePackage object later.
  struct SavePackageParam {
    // MIME type of current tab contents.
    const std::string& current_tab_mime_type;
    // Pointer to preference service.
    PrefService* prefs;
    // Type about saving page as only-html or complete-html.
    SavePackageType save_type;
    // File path for main html file.
    std::wstring saved_main_file_path;
    // Directory path for saving sub resources and sub html frames.
    std::wstring dir;

    SavePackageParam(const std::string& mime_type)
        : current_tab_mime_type(mime_type) { }
  };
  static bool GetSaveInfo(const std::wstring& suggest_name,
                          HWND container_hwnd,
                          SavePackageParam* param);

  // File name is consist of pure file name, dot and file extension name. File
  // name might has no dot and file extension, or has multiple dot inside file
  // name. The dot, which separates the pure file name and file extension name,
  // is last dot in the file name. If the file name matches following patterns:
  // base_file_name(ordinal_number) or base_file_name(ordinal_number).extension,
  // this function will return true and get the base file name part and
  // ordinal_number part via output parameters. The |file_ordinal_number| could
  // be empty if there is no content in ordinal_number part. If the file name
  // does not match the pattern or the ordinal_number part has non-digit
  // content, just return false.
  static bool GetBaseFileNameAndFileOrdinalNumber(
      const std::wstring& file_name,
      std::wstring* base_file_name,
      std::wstring* file_ordinal_number);

  // Check whether we can do the saving page operation for the specified URL.
  static bool IsSavableURL(const GURL& url);

  // Check whether we can do the saving page operation for the contents which
  // have the specified MIME type.
  static bool IsSavableContents(const std::string& contents_mime_type);

  // Check whether we can save page as complete-HTML for the contents which
  // have specified a MIME type. Now only contents which have the MIME type
  // "text/html" can be saved as complete-HTML.
  static bool CanSaveAsComplete(const std::string& contents_mime_type);

  // File name is considered being consist of pure file name, dot and file
  // extension name. File name might has no dot and file extension, or has
  // multiple dot inside file name. The dot, which separates the pure file
  // name and file extension name, is last dot in the whole file name.
  // This function is for making sure the length of specified file path is not
  // great than the specified maximum length of file path and getting safe pure
  // file name part if the input pure file name is too long.
  // The parameter |dir_path| specifies directory part of the specified
  // file path. The parameter |file_name_ext| specifies file extension
  // name part of the specified file path (including start dot). The parameter
  // |max_file_path_len| specifies maximum length of the specified file path.
  // The parameter |pure_file_name| input pure file name part of the specified
  // file path. If the length of specified file path is great than
  // |max_file_path_len|, the |pure_file_name| will output new pure file name
  // part for making sure the length of specified file path is less than
  // specified maximum length of file path. Return false if the function can
  // not get a safe pure file name, otherwise it returns true.
  static bool GetSafePureFileName(const std::wstring& dir_path,
                                  const std::wstring& file_name_ext,
                                  uint32 max_file_path_len,
                                  std::wstring* pure_file_name);

 private:
  // For testing.
  friend class SavePackageTest;
  SavePackage(const wchar_t* file_full_path,
              const wchar_t* directory_full_path);

  void Stop();
  void CheckFinish();
  void SaveNextFile(bool process_all_remainder_items);
  void DoSavingProcess();

  // Create a file name based on the response from the server.
  bool GenerateFilename(const std::string& disposition,
                        const std::wstring& url,
                        bool need_html_ext,
                        std::wstring* generated_name);

  // Get all savable resource links from current web page, include main
  // frame and sub-frame.
  void GetAllSavableResourceLinksForCurrentPage();
  // Get html data by serializing all frames of current page with lists
  // which contain all resource links that have local copy.
  void GetSerializedHtmlDataForCurrentPageWithLocalLinks();

  SaveItem* LookupItemInProcessBySaveId(int32 save_id);
  void PutInProgressItemToSavedMap(SaveItem* save_item);

  typedef base::hash_map<std::wstring, SaveItem*> SaveUrlItemMap;
  // in_progress_items_ is map of all saving job in in-progress state.
  SaveUrlItemMap in_progress_items_;
  // saved_failed_items_ is map of all saving job which are failed.
  SaveUrlItemMap saved_failed_items_;

  // The number of in process SaveItems.
  int in_process_count() const {
    return static_cast<int>(in_progress_items_.size());
  }

  // The number of all SaveItems which have completed, including success items
  // and failed items.
  int completed_count() const {
    return static_cast<int>(saved_success_items_.size() +
                            saved_failed_items_.size());
  }

  typedef std::queue<SaveItem*> SaveItemQueue;
  // A queue for items we are about to start saving.
  SaveItemQueue waiting_item_queue_;

  typedef base::hash_map<int32, SaveItem*> SavedItemMap;
  // saved_success_items_ is map of all saving job which are successfully saved.
  SavedItemMap saved_success_items_;

  // The request context which provides application-specific context for
  // URLRequest instances.
  scoped_refptr<URLRequestContext> request_context_;

  // Non-owning pointer for handling file writing on the file thread.
  SaveFileManager* file_manager_;

  WebContents* web_contents_;

  // We use a fake DownloadItem here in order to reuse the DownloadItemView.
  // This class owns the pointer.
  DownloadItem* download_;

  // The URL of the page the user wants to save.
  std::wstring page_url_;
  std::wstring saved_main_file_path_;
  std::wstring saved_main_directory_path_;

  // Indicates whether the actual saving job is finishing or not.
  bool finished_;

  // Indicates whether user canceled the saving job.
  bool user_canceled_;

  // Indicates whether user get disk error.
  bool disk_error_occurred_;

  // Type about saving page as only-html or complete-html.
  SavePackageType save_type_;

  // Number of all need to be saved resources.
  int all_save_items_count_;

  typedef base::hash_set<std::wstring> FileNameSet;
  // This set is used to eliminate duplicated file names in saving directory.
  FileNameSet file_name_set_;

  typedef base::hash_map<std::wstring, uint32> FileNameCountMap;
  // This map is used to track serial number for specified filename.
  FileNameCountMap file_name_count_map_;

  // Indicates current waiting state when SavePackage try to get something
  // from outside.
  WaitState wait_state_;

  // Unique id for this SavePackage.
  const int tab_id_;

  DISALLOW_EVIL_CONSTRUCTORS(SavePackage);
};

#endif  // CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H__