summaryrefslogtreecommitdiffstats
path: root/courgette/ensemble_create.cc
blob: c098710fe27aabcf0a3fafa2b77f63679b58ff66 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// The main idea in Courgette is to do patching *under a tranformation*.  The
// input is transformed into a new representation, patching occurs in the new
// repesentation, and then the tranform is reversed to get the patched data.
//
// The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
// Each of the elements has to go through the same set of steps in lock-step.

// This file contains the code to create the patch.


#include "courgette/ensemble.h"

#include <vector>
#include <limits>

#include "base/basictypes.h"
#include "base/logging.h"
#include "base/time.h"

#include "courgette/third_party/bsdiff.h"
#include "courgette/crc.h"
#include "courgette/difference_estimator.h"
#include "courgette/streams.h"
#include "courgette/region.h"
#include "courgette/simple_delta.h"

#include "courgette/patcher_x86_32.h"
#include "courgette/patch_generator_x86_32.h"

namespace courgette {

TransformationPatchGenerator::TransformationPatchGenerator(
    Element* old_element,
    Element* new_element,
    TransformationPatcher* patcher)
    : old_element_(old_element),
      new_element_(new_element),
      patcher_(patcher) {
}

TransformationPatchGenerator::~TransformationPatchGenerator() {
  delete patcher_;
}

// The default implementation of PredictTransformParameters delegates to the
// patcher.
Status TransformationPatchGenerator::PredictTransformParameters(
    SinkStreamSet* prediction) {
  return patcher_->PredictTransformParameters(prediction);
}

// The default implementation of Reform delegates to the patcher.
Status TransformationPatchGenerator::Reform(
    SourceStreamSet* transformed_element,
    SinkStream* reformed_element) {
  return patcher_->Reform(transformed_element, reformed_element);
}

// Makes a TransformationPatchGenerator of the appropriate variety for the
// Element kind.
TransformationPatchGenerator* MakeGenerator(Element* old_element,
                                            Element* new_element) {
  switch (new_element->kind()) {
    case EXE_UNKNOWN:
      break;
    case EXE_WIN_32_X86: {
      TransformationPatchGenerator* generator =
          new PatchGeneratorX86_32(
              old_element,
              new_element,
              new PatcherX86_32(old_element->region()),
              EXE_WIN_32_X86);
      return generator;
    }
    case EXE_ELF_32_X86: {
      TransformationPatchGenerator* generator =
          new PatchGeneratorX86_32(
              old_element,
              new_element,
              new PatcherX86_32(old_element->region()),
              EXE_ELF_32_X86);
      return generator;
    }
  }

  LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
  return NULL;
}

// Checks to see if the proposed comparison is 'unsafe'.  Sometimes one element
// from 'old' is matched as the closest element to multiple elements from 'new'.
// Each time this happens, the old element is transformed and serialized.  This
// is a problem when the old element is huge compared with the new element
// because the mutliple serialized copies can be much bigger than the size of
// either ensemble.
//
// The right way to avoid this is to ensure any one element from 'old' is
// serialized once, which requires matching code in the patch application.
//
// This is a quick hack to avoid the problem by prohibiting a big difference in
// size between matching elements.
bool UnsafeDifference(Element* old_element, Element* new_element) {
  double kMaxBloat = 2.0;
  size_t kMinWorrysomeDifference = 2 << 20;  // 2MB
  size_t old_size = old_element->region().length();
  size_t new_size = new_element->region().length();
  size_t low_size = std::min(old_size, new_size);
  size_t high_size = std::max(old_size, new_size);
  if (high_size - low_size < kMinWorrysomeDifference) return false;
  if (high_size < low_size * kMaxBloat) return false;
  return true;
}

// FindGenerators finds TransformationPatchGenerators for the elements of
// |new_ensemble|.  For each element of |new_ensemble| we find the closest
// matching element from |old_ensemble| and use that as the basis for
// differential compression.  The elements have to be the same kind so as to
// support transformation into the same kind of 'new representation'.
//
Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
                      std::vector<TransformationPatchGenerator*>* generators) {
  base::Time start_find_time = base::Time::Now();
  old_ensemble->FindEmbeddedElements();
  new_ensemble->FindEmbeddedElements();
  VLOG(1) << "done FindEmbeddedElements "
          << (base::Time::Now() - start_find_time).InSecondsF();

  std::vector<Element*> old_elements(old_ensemble->elements());
  std::vector<Element*> new_elements(new_ensemble->elements());

  VLOG(1) << "old has " << old_elements.size() << " elements";
  VLOG(1) << "new has " << new_elements.size() << " elements";

  DifferenceEstimator difference_estimator;
  std::vector<DifferenceEstimator::Base*> bases;

  base::Time start_bases_time = base::Time::Now();
  for (size_t i = 0;  i < old_elements.size();  ++i) {
    bases.push_back(
        difference_estimator.MakeBase(old_elements[i]->region()));
  }
  VLOG(1) << "done make bases "
          << (base::Time::Now() - start_bases_time).InSecondsF() << "s";

  for (size_t new_index = 0;  new_index < new_elements.size();  ++new_index) {
    Element* new_element = new_elements[new_index];
    DifferenceEstimator::Subject* new_subject =
        difference_estimator.MakeSubject(new_element->region());

    // Search through old elements to find the best match.
    //
    // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
    // new_ensemble probably have a very similar structure.  We can make the
    // search faster by making the comparison provided by DifferenceEstimator
    // more nuanced, returning early if the measured difference is greater than
    // the current best.  This will be most effective if we can arrange that the
    // first elements we try to match are likely the 'right' ones.  We could
    // prioritize elements that are of a similar size or similar position in the
    // sequence of elements.
    //
    Element* best_old_element = NULL;
    size_t best_difference = std::numeric_limits<size_t>::max();
    for (size_t old_index = 0;  old_index < old_elements.size();  ++old_index) {
      Element* old_element = old_elements[old_index];
      // Elements of different kinds are incompatible.
      if (old_element->kind() != new_element->kind())
        continue;

      if (UnsafeDifference(old_element, new_element))
        continue;

      base::Time start_compare = base::Time::Now();
      DifferenceEstimator::Base* old_base = bases[old_index];
      size_t difference = difference_estimator.Measure(old_base, new_subject);

      VLOG(1) << "Compare " << old_element->Name()
              << " to " << new_element->Name()
              << " --> " << difference
              << " in " << (base::Time::Now() - start_compare).InSecondsF()
              << "s";
      if (difference == 0) {
        VLOG(1) << "Skip " << new_element->Name()
                << " - identical to " << old_element->Name();
        best_difference = 0;
        best_old_element = NULL;
        break;
      }
      if (difference < best_difference) {
        best_difference = difference;
        best_old_element = old_element;
      }
    }

    if (best_old_element) {
      VLOG(1) << "Matched " << best_old_element->Name()
              << " to " << new_element->Name()
              << " --> " << best_difference;
      TransformationPatchGenerator* generator =
          MakeGenerator(best_old_element, new_element);
      if (generator)
        generators->push_back(generator);
    }
  }

  VLOG(1) << "done FindGenerators found " << generators->size()
          << " in " << (base::Time::Now() - start_find_time).InSecondsF()
          << "s";

  return C_OK;
}

void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
  for (size_t i = 0;  i < generators->size();  ++i) {
    delete (*generators)[i];
  }
  generators->clear();
}

////////////////////////////////////////////////////////////////////////////////

Status GenerateEnsemblePatch(SourceStream* base,
                             SourceStream* update,
                             SinkStream* final_patch) {
  VLOG(1) << "start GenerateEnsemblePatch";
  base::Time start_time = base::Time::Now();

  Region old_region(base->Buffer(), base->Remaining());
  Region new_region(update->Buffer(), update->Remaining());
  Ensemble old_ensemble(old_region, "old");
  Ensemble new_ensemble(new_region, "new");
  std::vector<TransformationPatchGenerator*> generators;
  Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
                                            &generators);
  if (generators_status != C_OK)
    return generators_status;

  SinkStreamSet patch_streams;

  SinkStream* tranformation_descriptions      = patch_streams.stream(0);
  SinkStream* parameter_correction            = patch_streams.stream(1);
  SinkStream* transformed_elements_correction = patch_streams.stream(2);
  SinkStream* ensemble_correction             = patch_streams.stream(3);

  size_t number_of_transformations = generators.size();
  if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
    return C_STREAM_ERROR;

  for (size_t i = 0;  i < number_of_transformations;  ++i) {
    ExecutableType kind = generators[i]->Kind();
    if (!tranformation_descriptions->WriteVarint32(kind))
      return C_STREAM_ERROR;
  }

  for (size_t i = 0;  i < number_of_transformations;  ++i) {
    Status status =
        generators[i]->WriteInitialParameters(tranformation_descriptions);
    if (status != C_OK)
      return status;
  }

  //
  // Generate sub-patch for parameters.
  //
  SinkStreamSet predicted_parameters_sink;
  SinkStreamSet corrected_parameters_sink;

  for (size_t i = 0;  i < number_of_transformations;  ++i) {
    SinkStreamSet single_predicted_parameters;
    Status status;
    status = generators[i]->PredictTransformParameters(
        &single_predicted_parameters);
    if (status != C_OK)
      return status;
    if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
      return C_STREAM_ERROR;

    SinkStreamSet single_corrected_parameters;
    status = generators[i]->CorrectedTransformParameters(
        &single_corrected_parameters);
    if (status != C_OK)
      return status;
    if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
      return C_STREAM_ERROR;
  }

  SinkStream linearized_predicted_parameters;
  SinkStream linearized_corrected_parameters;

  if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
    return C_STREAM_ERROR;
  if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
    return C_STREAM_ERROR;

  SourceStream predicted_parameters_source;
  SourceStream corrected_parameters_source;
  predicted_parameters_source.Init(linearized_predicted_parameters);
  corrected_parameters_source.Init(linearized_corrected_parameters);

  Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
                                             &corrected_parameters_source,
                                             parameter_correction);
  if (delta1_status != C_OK)
    return delta1_status;

  //
  // Generate sub-patch for elements.
  //
  corrected_parameters_source.Init(linearized_corrected_parameters);
  SourceStreamSet corrected_parameters_source_set;
  if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
    return C_STREAM_ERROR;

  SinkStreamSet predicted_transformed_elements;
  SinkStreamSet corrected_transformed_elements;

  for (size_t i = 0;  i < number_of_transformations;  ++i) {
    SourceStreamSet single_parameters;
    if (!corrected_parameters_source_set.ReadSet(&single_parameters))
      return C_STREAM_ERROR;
    SinkStreamSet single_predicted_transformed_element;
    SinkStreamSet single_corrected_transformed_element;
    Status status = generators[i]->Transform(
        &single_parameters,
        &single_predicted_transformed_element,
        &single_corrected_transformed_element);
    if (status != C_OK)
      return status;
    if (!single_parameters.Empty())
      return C_STREAM_NOT_CONSUMED;
    if (!predicted_transformed_elements.WriteSet(
            &single_predicted_transformed_element))
      return C_STREAM_ERROR;
    if (!corrected_transformed_elements.WriteSet(
            &single_corrected_transformed_element))
      return C_STREAM_ERROR;
  }

  if (!corrected_parameters_source_set.Empty())
    return C_STREAM_NOT_CONSUMED;

  SinkStream linearized_predicted_transformed_elements;
  SinkStream linearized_corrected_transformed_elements;

  if (!predicted_transformed_elements.CopyTo(
          &linearized_predicted_transformed_elements))
    return C_STREAM_ERROR;
  if (!corrected_transformed_elements.CopyTo(
          &linearized_corrected_transformed_elements))
    return C_STREAM_ERROR;

  SourceStream predicted_transformed_elements_source;
  SourceStream corrected_transformed_elements_source;
  predicted_transformed_elements_source
      .Init(linearized_predicted_transformed_elements);
  corrected_transformed_elements_source
      .Init(linearized_corrected_transformed_elements);

  Status delta2_status =
      GenerateSimpleDelta(&predicted_transformed_elements_source,
                          &corrected_transformed_elements_source,
                          transformed_elements_correction);
  if (delta2_status != C_OK)
    return delta2_status;

  // Last use, free storage.
  linearized_predicted_transformed_elements.Retire();

  //
  // Generate sub-patch for whole enchilada.
  //
  SinkStream predicted_ensemble;

  if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
    return C_STREAM_ERROR;

  SourceStreamSet corrected_transformed_elements_source_set;
  corrected_transformed_elements_source
      .Init(linearized_corrected_transformed_elements);
  if (!corrected_transformed_elements_source_set
      .Init(&corrected_transformed_elements_source))
    return C_STREAM_ERROR;

  for (size_t i = 0;  i < number_of_transformations;  ++i) {
    SourceStreamSet single_corrected_transformed_element;
    if (!corrected_transformed_elements_source_set.ReadSet(
            &single_corrected_transformed_element))
      return C_STREAM_ERROR;
    Status status = generators[i]->Reform(&single_corrected_transformed_element,
                                          &predicted_ensemble);
    if (status != C_OK)
      return status;
    if (!single_corrected_transformed_element.Empty())
      return C_STREAM_NOT_CONSUMED;
  }

  if (!corrected_transformed_elements_source_set.Empty())
    return C_STREAM_NOT_CONSUMED;

  // No more references to this stream's buffer.
  linearized_corrected_transformed_elements.Retire();

  FreeGenerators(&generators);

  size_t final_patch_input_size = predicted_ensemble.Length();
  SourceStream predicted_ensemble_source;
  predicted_ensemble_source.Init(predicted_ensemble);
  Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
                                             update,
                                             ensemble_correction);
  if (delta3_status != C_OK)
    return delta3_status;

  //
  // Final output stream has a header followed by a StreamSet.
  //
  if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
      !final_patch->WriteVarint32(CourgettePatchFile::kVersion) ||
      !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
                                               old_region.length())) ||
      !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
                                               new_region.length())) ||
      !final_patch->WriteSizeVarint32(final_patch_input_size) ||
      !patch_streams.CopyTo(final_patch)) {
    return C_STREAM_ERROR;
  }

  VLOG(1) << "done GenerateEnsemblePatch "
          << (base::Time::Now() - start_time).InSecondsF() << "s";

  return C_OK;
}

}  // namespace