diff options
Diffstat (limited to 'src/google/protobuf/stubs/strutil.cc')
-rw-r--r-- | src/google/protobuf/stubs/strutil.cc | 231 |
1 files changed, 172 insertions, 59 deletions
diff --git a/src/google/protobuf/stubs/strutil.cc b/src/google/protobuf/stubs/strutil.cc index bb658ba..814d1b9 100644 --- a/src/google/protobuf/stubs/strutil.cc +++ b/src/google/protobuf/stubs/strutil.cc @@ -189,6 +189,44 @@ void SplitStringUsing(const string& full, SplitStringToIteratorUsing(full, delim, it); } +// Split a string using a character delimiter. Append the components +// to 'result'. If there are consecutive delimiters, this function +// will return corresponding empty strings. The string is split into +// at most the specified number of pieces greedily. This means that the +// last piece may possibly be split further. To split into as many pieces +// as possible, specify 0 as the number of pieces. +// +// If "full" is the empty string, yields an empty string as the only value. +// +// If "pieces" is negative for some reason, it returns the whole string +// ---------------------------------------------------------------------- +template <typename StringType, typename ITR> +static inline +void SplitStringToIteratorAllowEmpty(const StringType& full, + const char* delim, + int pieces, + ITR& result) { + string::size_type begin_index, end_index; + begin_index = 0; + + for (int i = 0; (i < pieces-1) || (pieces == 0); i++) { + end_index = full.find_first_of(delim, begin_index); + if (end_index == string::npos) { + *result++ = full.substr(begin_index); + return; + } + *result++ = full.substr(begin_index, (end_index - begin_index)); + begin_index = end_index + 1; + } + *result++ = full.substr(begin_index); +} + +void SplitStringAllowEmpty(const string& full, const char* delim, + vector<string>* result) { + back_insert_iterator<vector<string> > it(*result); + SplitStringToIteratorAllowEmpty(full, delim, 0, it); +} + // ---------------------------------------------------------------------- // JoinStrings() // This merges a vector of string components with delim inserted @@ -558,6 +596,120 @@ uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) { return static_cast<uint32>(result); } +inline bool safe_parse_sign(string* text /*inout*/, + bool* negative_ptr /*output*/) { + const char* start = text->data(); + const char* end = start + text->size(); + + // Consume whitespace. + while (start < end && (start[0] == ' ')) { + ++start; + } + while (start < end && (end[-1] == ' ')) { + --end; + } + if (start >= end) { + return false; + } + + // Consume sign. + *negative_ptr = (start[0] == '-'); + if (*negative_ptr || start[0] == '+') { + ++start; + if (start >= end) { + return false; + } + } + *text = text->substr(start - text->data(), end - start); + return true; +} + +inline bool safe_parse_positive_int( + string text, int32* value_p) { + int base = 10; + int32 value = 0; + const int32 vmax = std::numeric_limits<int32>::max(); + assert(vmax > 0); + assert(vmax >= base); + const int32 vmax_over_base = vmax / base; + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = c - '0'; + if (digit >= base || digit < 0) { + *value_p = value; + return false; + } + if (value > vmax_over_base) { + *value_p = vmax; + return false; + } + value *= base; + if (value > vmax - digit) { + *value_p = vmax; + return false; + } + value += digit; + } + *value_p = value; + return true; +} + +inline bool safe_parse_negative_int( + string text, int32* value_p) { + int base = 10; + int32 value = 0; + const int32 vmin = std::numeric_limits<int32>::min(); + assert(vmin < 0); + assert(vmin <= 0 - base); + int32 vmin_over_base = vmin / base; + // 2003 c++ standard [expr.mul] + // "... the sign of the remainder is implementation-defined." + // Although (vmin/base)*base + vmin%base is always vmin. + // 2011 c++ standard tightens the spec but we cannot rely on it. + if (vmin % base > 0) { + vmin_over_base += 1; + } + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = c - '0'; + if (digit >= base || digit < 0) { + *value_p = value; + return false; + } + if (value < vmin_over_base) { + *value_p = vmin; + return false; + } + value *= base; + if (value < vmin + digit) { + *value_p = vmin; + return false; + } + value -= digit; + } + *value_p = value; + return true; +} + +bool safe_int(string text, int32* value_p) { + *value_p = 0; + bool negative; + if (!safe_parse_sign(&text, &negative)) { + return false; + } + if (!negative) { + return safe_parse_positive_int(text, value_p); + } else { + return safe_parse_negative_int(text, value_p); + } +} + // ---------------------------------------------------------------------- // FastIntToBuffer() // FastInt64ToBuffer() @@ -670,7 +822,14 @@ char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { static const char *hexdigits = "0123456789abcdef"; buffer[num_byte] = '\0'; for (int i = num_byte - 1; i >= 0; i--) { +#ifdef _M_X64 + // MSVC x64 platform has a bug optimizing the uint32(value) in the #else + // block. Given that the uint32 cast was to improve performance on 32-bit + // platforms, we use 64-bit '&' directly. + buffer[i] = hexdigits[value & 0xf]; +#else buffer[i] = hexdigits[uint32(value) & 0xf]; +#endif value >>= 4; } return buffer; @@ -1098,68 +1257,22 @@ char* FloatToBuffer(float value, char* buffer) { return buffer; } -// ---------------------------------------------------------------------- -// NoLocaleStrtod() -// This code will make you cry. -// ---------------------------------------------------------------------- - -// Returns a string identical to *input except that the character pointed to -// by radix_pos (which should be '.') is replaced with the locale-specific -// radix character. -string LocalizeRadix(const char* input, const char* radix_pos) { - // Determine the locale-specific radix character by calling sprintf() to - // print the number 1.5, then stripping off the digits. As far as I can - // tell, this is the only portable, thread-safe way to get the C library - // to divuldge the locale's radix character. No, localeconv() is NOT - // thread-safe. - char temp[16]; - int size = sprintf(temp, "%.1f", 1.5); - GOOGLE_CHECK_EQ(temp[0], '1'); - GOOGLE_CHECK_EQ(temp[size-1], '5'); - GOOGLE_CHECK_LE(size, 6); - - // Now replace the '.' in the input with it. - string result; - result.reserve(strlen(input) + size - 3); - result.append(input, radix_pos); - result.append(temp + 1, size - 2); - result.append(radix_pos + 1); - return result; -} +string ToHex(uint64 num) { + if (num == 0) { + return string("0"); + } -double NoLocaleStrtod(const char* text, char** original_endptr) { - // We cannot simply set the locale to "C" temporarily with setlocale() - // as this is not thread-safe. Instead, we try to parse in the current - // locale first. If parsing stops at a '.' character, then this is a - // pretty good hint that we're actually in some other locale in which - // '.' is not the radix character. - - char* temp_endptr; - double result = strtod(text, &temp_endptr); - if (original_endptr != NULL) *original_endptr = temp_endptr; - if (*temp_endptr != '.') return result; - - // Parsing halted on a '.'. Perhaps we're in a different locale? Let's - // try to replace the '.' with a locale-specific radix character and - // try again. - string localized = LocalizeRadix(text, temp_endptr); - const char* localized_cstr = localized.c_str(); - char* localized_endptr; - result = strtod(localized_cstr, &localized_endptr); - if ((localized_endptr - localized_cstr) > - (temp_endptr - text)) { - // This attempt got further, so replacing the decimal must have helped. - // Update original_endptr to point at the right location. - if (original_endptr != NULL) { - // size_diff is non-zero if the localized radix has multiple bytes. - int size_diff = localized.size() - strlen(text); - // const_cast is necessary to match the strtod() interface. - *original_endptr = const_cast<char*>( - text + (localized_endptr - localized_cstr - size_diff)); - } + // Compute hex bytes in reverse order, writing to the back of the + // buffer. + char buf[16]; // No more than 16 hex digits needed. + char* bufptr = buf + 16; + static const char kHexChars[] = "0123456789abcdef"; + while (num != 0) { + *--bufptr = kHexChars[num & 0xf]; + num >>= 4; } - return result; + return string(bufptr, buf + 16 - bufptr); } } // namespace protobuf |