sync/internal_api/public/base/unique_position.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "sync/internal_api/public/base/unique_position.h"

#include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "sync/protocol/unique_position.pb.h"
#include "third_party/zlib/zlib.h"

namespace syncer {

const size_t UniquePosition::kSuffixLength = 28;
const size_t UniquePosition::kCompressBytesThreshold = 128;

// static.
bool UniquePosition::IsValidSuffix(const std::string& suffix) {
  // The suffix must be exactly the specified length, otherwise unique suffixes
  // are not sufficient to guarantee unique positions (because prefix + suffix
  // == p + refixsuffix).
  return suffix.length() == kSuffixLength;
}

// static.
bool UniquePosition::IsValidBytes(const std::string& bytes) {
  // The first condition ensures that our suffix uniqueness is sufficient to
  // guarantee position uniqueness.  Otherwise, it's possible the end of some
  // prefix + some short suffix == some long suffix.
  // The second condition ensures that FindSmallerWithSuffix can always return a
  // result.
  return bytes.length() >= kSuffixLength
      && bytes[bytes.length()-1] != 0;
}

// static.
UniquePosition UniquePosition::CreateInvalid() {
  UniquePosition pos;
  DCHECK(!pos.IsValid());
  return pos;
}

// static.
UniquePosition UniquePosition::FromProto(const sync_pb::UniquePosition& proto) {
  if (proto.has_value()) {
    return UniquePosition(proto.value());
  } else if (proto.has_compressed_value() && proto.has_uncompressed_length()) {
    uLongf uncompressed_len = proto.uncompressed_length();
    std::string uncompressed;

    uncompressed.resize(uncompressed_len);
    int result = uncompress(
        reinterpret_cast<Bytef*>(string_as_array(&uncompressed)),
        &uncompressed_len,
        reinterpret_cast<const Bytef*>(proto.compressed_value().data()),
        proto.compressed_value().size());
    if (result != Z_OK) {
      DLOG(ERROR) << "Unzip failed " << result;
      return UniquePosition::CreateInvalid();
    }
    if (uncompressed_len != proto.uncompressed_length()) {
      DLOG(ERROR)
          << "Uncompressed length " << uncompressed_len
          << " did not match specified length " << proto.uncompressed_length();
      return UniquePosition::CreateInvalid();
    }
    return UniquePosition(uncompressed);
  } else {
    return UniquePosition::CreateInvalid();
  }
}

// static.
UniquePosition UniquePosition::FromInt64(
    int64 x, const std::string& suffix) {
  uint64 y = static_cast<uint64>(x);
  y ^= 0x8000000000000000ULL; // Make it non-negative.
  std::string bytes(8, 0);
  for (int i = 7; i >= 0; --i) {
    bytes[i] = static_cast<uint8>(y);
    y >>= 8;
  }
  return UniquePosition(bytes, suffix);
}

// static.
UniquePosition UniquePosition::InitialPosition(
    const std::string& suffix) {
  DCHECK(IsValidSuffix(suffix));
  return UniquePosition(std::string(), suffix);
}

// static.
UniquePosition UniquePosition::Before(
    const UniquePosition& x,
    const std::string& suffix) {
  DCHECK(IsValidSuffix(suffix));
  DCHECK(x.IsValid());
  const std::string& before = FindSmallerWithSuffix(x.bytes_, suffix);
  return UniquePosition(before, suffix);
}

// static.
UniquePosition UniquePosition::After(
    const UniquePosition& x,
    const std::string& suffix) {
  DCHECK(IsValidSuffix(suffix));
  DCHECK(x.IsValid());
  const std::string& after = FindGreaterWithSuffix(x.bytes_, suffix);
  return UniquePosition(after, suffix);
}

// static.
UniquePosition UniquePosition::Between(
    const UniquePosition& before,
    const UniquePosition& after,
    const std::string& suffix) {
  DCHECK(before.IsValid());
  DCHECK(after.IsValid());
  DCHECK(before.LessThan(after));
  DCHECK(IsValidSuffix(suffix));
  const std::string& mid =
      FindBetweenWithSuffix(before.bytes_, after.bytes_, suffix);
  return UniquePosition(mid, suffix);
}

UniquePosition::UniquePosition() : is_valid_(false) {}

bool UniquePosition::LessThan(const UniquePosition& other) const {
  DCHECK(this->IsValid());
  DCHECK(other.IsValid());
  return bytes_ < other.bytes_;
}

bool UniquePosition::Equals(const UniquePosition& other) const {
  if (!this->IsValid() && !other.IsValid())
    return true;

  return bytes_ == other.bytes_;
}

void UniquePosition::ToProto(sync_pb::UniquePosition* proto) const {
  proto->Clear();
  if (bytes_.size() < kCompressBytesThreshold) {
    // If it's small, then just write it.  This is the common case.
    proto->set_value(bytes_);
  } else {
    // We've got a large one.  Compress it.
    proto->set_uncompressed_length(bytes_.size());
    std::string* compressed = proto->mutable_compressed_value();

    uLongf compressed_len = compressBound(bytes_.size());
    compressed->resize(compressed_len);
    int result = compress(reinterpret_cast<Bytef*>(string_as_array(compressed)),
             &compressed_len,
             reinterpret_cast<const Bytef*>(bytes_.data()),
             bytes_.size());
    if (result != Z_OK) {
      NOTREACHED() << "Failed to compress position: " << result;
      // Maybe we can write an uncompressed version?
      proto->Clear();
      proto->set_value(bytes_);
    } else if (compressed_len >= bytes_.size()) {
      // Oops, we made it bigger.  Just write the uncompressed version instead.
      proto->Clear();
      proto->set_value(bytes_);
    } else {
      // Success!  Don't forget to adjust the string's length.
      compressed->resize(compressed_len);
    }
  }
}

void UniquePosition::SerializeToString(std::string* blob) const {
  DCHECK(blob);
  sync_pb::UniquePosition proto;
  ToProto(&proto);
  proto.SerializeToString(blob);
}

int64 UniquePosition::ToInt64() const {
  uint64 y = 0;
  const std::string& s = bytes_;
  size_t l = sizeof(int64);
  if (s.length() < l) {
    NOTREACHED();
    l = s.length();
  }
  for (size_t i = 0; i < l; ++i) {
    const uint8 byte = s[l - i - 1];
    y |= static_cast<uint64>(byte) << (i * 8);
  }
  y ^= 0x8000000000000000ULL;
  // This is technically implementation-defined if y > INT64_MAX, so
  // we're assuming that we're on a twos-complement machine.
  return static_cast<int64>(y);
}

bool UniquePosition::IsValid() const {
  return is_valid_;
}

std::string UniquePosition::ToDebugString() const {
  if (bytes_.empty())
    return std::string("INVALID[]");

  std::string debug_string = base::HexEncode(bytes_.data(), bytes_.length());
  if (!IsValid()) {
    debug_string = "INVALID[" + debug_string + "]";
  }
  return debug_string;;
}

std::string UniquePosition::GetSuffixForTest() const {
  const size_t prefix_len = bytes_.length() - kSuffixLength;
  return bytes_.substr(prefix_len, std::string::npos);
}

std::string UniquePosition::FindSmallerWithSuffix(
    const std::string& reference,
    const std::string& suffix) {
  size_t ref_zeroes = reference.find_first_not_of('\0');
  size_t suffix_zeroes = suffix.find_first_not_of('\0');

  // Neither of our inputs are allowed to have trailing zeroes, so the following
  // must be true.
  DCHECK_NE(ref_zeroes, std::string::npos);
  DCHECK_NE(suffix_zeroes, std::string::npos);

  if (suffix_zeroes > ref_zeroes) {
    // Implies suffix < ref.
    return std::string();
  }

  if (suffix.substr(suffix_zeroes) < reference.substr(ref_zeroes)) {
    // Prepend zeroes so the result has as many zero digits as |reference|.
    return std::string(ref_zeroes - suffix_zeroes, '\0');
  } else if (suffix_zeroes > 1) {
    // Prepend zeroes so the result has one more zero digit than |reference|.
    // We could also take the "else" branch below, but taking this branch will
    // give us a smaller result.
    return std::string(ref_zeroes - suffix_zeroes + 1, '\0');
  } else {
    // Prepend zeroes to match those in the |reference|, then something smaller
    // than the first non-zero digit in |reference|.
    char lt_digit = static_cast<uint8>(reference[ref_zeroes])/2;
    return std::string(ref_zeroes, '\0') + lt_digit;
  }
}

// static
std::string UniquePosition::FindGreaterWithSuffix(
    const std::string& reference,
    const std::string& suffix) {
  size_t ref_FFs = reference.find_first_not_of(kuint8max);
  size_t suffix_FFs = suffix.find_first_not_of(kuint8max);

  if (ref_FFs == std::string::npos) {
    ref_FFs = reference.length();
  }
  if (suffix_FFs == std::string::npos) {
    suffix_FFs = suffix.length();
  }

  if (suffix_FFs > ref_FFs) {
    // Implies suffix > reference.
    return std::string();
  }

  if (suffix.substr(suffix_FFs) > reference.substr(ref_FFs)) {
    // Prepend FF digits to match those in |reference|.
    return std::string(ref_FFs - suffix_FFs, kuint8max);
  } else if (suffix_FFs > 1) {
    // Prepend enough leading FF digits so result has one more of them than
    // |reference| does.  We could also take the "else" branch below, but this
    // gives us a smaller result.
    return std::string(ref_FFs - suffix_FFs + 1, kuint8max);
  } else {
    // Prepend FF digits to match those in |reference|, then something larger
    // than the first non-FF digit in |reference|.
    char gt_digit = static_cast<uint8>(reference[ref_FFs]) +
        (kuint8max - static_cast<uint8>(reference[ref_FFs]) + 1) / 2;
    return std::string(ref_FFs, kuint8max) + gt_digit;
  }
}

// TODO(rlarocque): Is there a better algorithm that we could use here?
// static
std::string UniquePosition::FindBetweenWithSuffix(
    const std::string& before,
    const std::string& after,
    const std::string& suffix) {
  DCHECK(IsValidSuffix(suffix));
  DCHECK_NE(before, after);
  DCHECK_LT(before, after);

  std::string mid;

  // Sometimes our suffix puts us where we want to be.
  if (before < suffix && suffix < after) {
    return std::string();
  }

  size_t i = 0;
  for ( ; i < std::min(before.length(), after.length()); ++i) {
    uint8 a_digit = before[i];
    uint8 b_digit = after[i];

    if (b_digit - a_digit >= 2) {
      mid.push_back(a_digit + (b_digit - a_digit)/2);
      return mid;
    } else if (a_digit == b_digit) {
      mid.push_back(a_digit);

      // Both strings are equal so far.  Will appending the suffix at this point
      // give us the comparison we're looking for?
      if (before.substr(i+1) < suffix && suffix < after.substr(i+1)) {
        return mid;
      }
    } else {
      DCHECK_EQ(b_digit - a_digit, 1);  // Implied by above if branches.

      // The two options are off by one digit.  The choice of whether to round
      // up or down here will have consequences on what we do with the remaining
      // digits.  Exploring both options is an optimization and is not required
      // for the correctness of this algorithm.

      // Option A: Round down the current digit.  This makes our |mid| <
      // |after|, no matter what we append afterwards.  We then focus on
      // appending digits until |mid| > |before|.
      std::string mid_a = mid;
      mid_a.push_back(a_digit);
      mid_a.append(FindGreaterWithSuffix(before.substr(i+1), suffix));

      // Option B: Round up the current digit.  This makes our |mid| > |before|,
      // no matter what we append afterwards.  We then focus on appending digits
      // until |mid| < |after|.  Note that this option may not be viable if the
      // current digit is the last one in |after|, so we skip the option in that
      // case.
      if (after.length() > i+1) {
        std::string mid_b = mid;
        mid_b.push_back(b_digit);
        mid_b.append(FindSmallerWithSuffix(after.substr(i+1), suffix));

        // Does this give us a shorter position value?  If so, use it.
        if (mid_b.length() < mid_a.length()) {
          return mid_b;
        }
      }
      return mid_a;
    }
  }

  // If we haven't found a midpoint yet, the following must be true:
  DCHECK_EQ(before.substr(0, i), after.substr(0, i));
  DCHECK_EQ(before, mid);
  DCHECK_LT(before.length(), after.length());

  // We know that we'll need to append at least one more byte to |mid| in the
  // process of making it < |after|.  Appending any digit, regardless of the
  // value, will make |before| < |mid|.  Therefore, the following will get us a
  // valid position.

  mid.append(FindSmallerWithSuffix(after.substr(i), suffix));
  return mid;
}

UniquePosition::UniquePosition(const std::string& internal_rep)
    : bytes_(internal_rep),
      is_valid_(IsValidBytes(bytes_)) {
}

UniquePosition::UniquePosition(
    const std::string& prefix,
    const std::string& suffix)
  : bytes_(prefix + suffix),
    is_valid_(IsValidBytes(bytes_)) {
  DCHECK(IsValidSuffix(suffix));
  DCHECK(IsValid());
}

}  // namespace syncer