diff options
-rw-r--r-- | base/string_util.cc | 215 | ||||
-rw-r--r-- | base/string_util.h | 25 | ||||
-rw-r--r-- | base/string_util_mac.cc | 8 | ||||
-rw-r--r-- | base/string_util_unittest.cc | 182 | ||||
-rw-r--r-- | base/string_util_win.cc | 8 |
5 files changed, 421 insertions, 17 deletions
diff --git a/base/string_util.cc b/base/string_util.cc index 2122b9f..3c7bb87 100644 --- a/base/string_util.cc +++ b/base/string_util.cc @@ -26,16 +26,23 @@ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // StringPrintf stuff based on strings/stringprintf.cc by Sanjay Ghemawat #include "base/string_util.h" -#include <algorithm> +#include <ctype.h> +#include <errno.h> #include <math.h> #include <stdarg.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <time.h> +#include <wchar.h> +#include <wctype.h> + +#include <algorithm> #include <vector> #include "base/basictypes.h" @@ -89,6 +96,139 @@ static bool CompareParameter(const ReplacementOffset& elem1, return elem1.parameter < elem2.parameter; } +// Generalized string-to-number conversion. +// +// StringToNumberTraits should provide: +// - a typedef for string_type, the STL string type used as input. +// - a typedef for value_type, the target numeric type. +// - a static function, convert_func, which dispatches to an appropriate +// strtol-like function and returns type value_type. +// - a static function, valid_func, which validates |input| and returns a bool +// indicating whether it is in proper form. This is used to check for +// conditions that convert_func tolerates but should result in +// StringToNumber returning false. For strtol-like funtions, valid_func +// should check for leading whitespace. +template<typename StringToNumberTraits> +bool StringToNumber(const typename StringToNumberTraits::string_type& input, + typename StringToNumberTraits::value_type* output) { + typedef StringToNumberTraits traits; + + errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows. + typename traits::string_type::value_type* endptr = NULL; + typename traits::value_type value = traits::convert_func(input.c_str(), + &endptr); + *output = value; + + // Cases to return false: + // - If errno is ERANGE, there was an overflow or underflow. + // - If the input string is empty, there was nothing to parse. + // - If endptr does not point to the end of the string, there are either + // characters remaining in the string after a parsed number, or the string + // does not begin with a parseable number. endptr is compared to the + // expected end given the string's stated length to correctly catch cases + // where the string contains embedded NUL characters. + // - valid_func determines that the input is not in preferred form. + return errno == 0 && + !input.empty() && + input.c_str() + input.length() == endptr && + traits::valid_func(input); +} + +class StringToLongTraits { + public: + typedef std::string string_type; + typedef long value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { + return strtol(str, endptr, kBase); + } + static inline bool valid_func(const string_type& str) { + return !isspace(str[0]); + } +}; + +class WStringToLongTraits { + public: + typedef std::wstring string_type; + typedef long value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { + return wcstol(str, endptr, kBase); + } + static inline bool valid_func(const string_type& str) { + return !iswspace(str[0]); + } +}; + +class StringToInt64Traits { + public: + typedef std::string string_type; + typedef int64 value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { +#ifdef OS_WIN + return _strtoi64(str, endptr, kBase); +#else // assume OS_POSIX + return strtoll(str, endptr, kBase); +#endif + } + static inline bool valid_func(const string_type& str) { + return !isspace(str[0]); + } +}; + +class WStringToInt64Traits { + public: + typedef std::wstring string_type; + typedef int64 value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { +#ifdef OS_WIN + return _wcstoi64(str, endptr, kBase); +#else // assume OS_POSIX + return wcstoll(str, endptr, kBase); +#endif + } + static inline bool valid_func(const string_type& str) { + return !iswspace(str[0]); + } +}; + +// For the HexString variants, use the unsigned variants like strtoul for +// convert_func so that input like "0x80000000" doesn't result in an overflow. + +class HexStringToLongTraits { + public: + typedef std::string string_type; + typedef long value_type; + static const int kBase = 16; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { + return strtoul(str, endptr, kBase); + } + static inline bool valid_func(const string_type& str) { + return !isspace(str[0]); + } +}; + +class HexWStringToLongTraits { + public: + typedef std::wstring string_type; + typedef long value_type; + static const int kBase = 16; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { + return wcstoul(str, endptr, kBase); + } + static inline bool valid_func(const string_type& str) { + return !iswspace(str[0]); + } +}; + } // namespace @@ -1019,3 +1159,76 @@ bool MatchPattern(const std::wstring& eval, const std::wstring& pattern) { bool MatchPattern(const std::string& eval, const std::string& pattern) { return MatchPatternT(eval.c_str(), pattern.c_str()); } + +// For the various *ToInt conversions, there are no *ToIntTraits classes to use +// because there's no such thing as strtoi. Use *ToLongTraits through a cast +// instead, requiring that long and int are compatible and equal-width. They +// are on our target platforms. + +bool StringToInt(const std::string& input, int* output) { + DCHECK(sizeof(int) == sizeof(long)); + return StringToNumber<StringToLongTraits>(input, + reinterpret_cast<long*>(output)); +} + +bool StringToInt(const std::wstring& input, int* output) { + DCHECK(sizeof(int) == sizeof(long)); + return StringToNumber<WStringToLongTraits>(input, + reinterpret_cast<long*>(output)); +} + +bool StringToInt64(const std::string& input, int64* output) { + return StringToNumber<StringToInt64Traits>(input, output); +} + +bool StringToInt64(const std::wstring& input, int64* output) { + return StringToNumber<WStringToInt64Traits>(input, output); +} + +bool HexStringToInt(const std::string& input, int* output) { + DCHECK(sizeof(int) == sizeof(long)); + return StringToNumber<HexStringToLongTraits>(input, + reinterpret_cast<long*>(output)); +} + +bool HexStringToInt(const std::wstring& input, int* output) { + DCHECK(sizeof(int) == sizeof(long)); + return StringToNumber<HexWStringToLongTraits>( + input, reinterpret_cast<long*>(output)); +} + +int StringToInt(const std::string& value) { + int result; + StringToInt(value, &result); + return result; +} + +int StringToInt(const std::wstring& value) { + int result; + StringToInt(value, &result); + return result; +} + +int64 StringToInt64(const std::string& value) { + int64 result; + StringToInt64(value, &result); + return result; +} + +int64 StringToInt64(const std::wstring& value) { + int64 result; + StringToInt64(value, &result); + return result; +} + +int HexStringToInt(const std::string& value) { + int result; + HexStringToInt(value, &result); + return result; +} + +int HexStringToInt(const std::wstring& value) { + int result; + HexStringToInt(value, &result); + return result; +} diff --git a/base/string_util.h b/base/string_util.h index e5fd147..d47d5f2 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -328,8 +328,33 @@ std::string IntToString(int value); std::string Int64ToString(int64 value); std::wstring Int64ToWString(int64 value); std::wstring IntToWString(int value); + +// Perform a best-effort conversion of the input string to a numeric type, +// setting |*output| to the result of the conversion. Returns true for +// "perfect" conversions; returns false in the following cases: +// - Overflow/underflow. |*output| will be set to the maximum value supported +// by the data type. +// - Trailing characters in the string after parsing the number. |*output| +// will be set to the value of the number that was parsed. +// - No characters parseable as a number at the beginning of the string. +// |*output| will be set to 0. +// - Empty string. |*output| will be set to 0. +bool StringToInt(const std::string& input, int* output); +bool StringToInt(const std::wstring& input, int* output); +bool StringToInt64(const std::string& input, int64* output); +bool StringToInt64(const std::wstring& input, int64* output); +bool HexStringToInt(const std::string& input, int* output); +bool HexStringToInt(const std::wstring& input, int* output); + +// Convenience forms of the above, when the caller is uninterested in the +// boolean return value. These return only the |*output| value from the +// above conversions: a best-effort conversion when possible, otherwise, 0. +int StringToInt(const std::string& value); +int StringToInt(const std::wstring& value); int64 StringToInt64(const std::string& value); int64 StringToInt64(const std::wstring& value); +int HexStringToInt(const std::string& value); +int HexStringToInt(const std::wstring& value); // Return a C++ string given printf-like input. std::string StringPrintf(const char* format, ...); diff --git a/base/string_util_mac.cc b/base/string_util_mac.cc index 4c5f3dc..5079da1 100644 --- a/base/string_util_mac.cc +++ b/base/string_util_mac.cc @@ -229,11 +229,3 @@ NumberFormat* NumberFormatSingleton() { InitializeStatics(); return number_format_singleton; } - -int64 StringToInt64(const std::string& value) { - return atoll(value.c_str()); -} - -int64 StringToInt64(const std::wstring& value) { - return wcstoll(value.c_str(), NULL, 10); -} diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc index c6ff622..03df6de 100644 --- a/base/string_util_unittest.cc +++ b/base/string_util_unittest.cc @@ -401,6 +401,16 @@ TEST(StringUtilTest, ConvertASCII) { std::string empty; EXPECT_EQ(empty, WideToASCII(wempty)); EXPECT_EQ(wempty, ASCIIToWide(empty)); + + // Convert strings with an embedded NUL character. + const char chars_with_nul[] = "test\0string"; + const int length_with_nul = arraysize(chars_with_nul) - 1; + std::string string_with_nul(chars_with_nul, length_with_nul); + std::wstring wide_with_nul = ASCIIToWide(string_with_nul); + EXPECT_EQ(length_with_nul, wide_with_nul.length()); + std::string narrow_with_nul = WideToASCII(wide_with_nul); + EXPECT_EQ(length_with_nul, narrow_with_nul.length()); + EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); } static const struct { @@ -537,6 +547,178 @@ TEST(StringUtilTest, Uint64ToString) { EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input)); } +TEST(StringUtilTest, StringToInt) { + static const struct { + std::string input; + int output; + bool success; + } cases[] = { + {"0", 0, true}, + {"42", 42, true}, + {"-2147483648", INT_MIN, true}, + {"2147483647", INT_MAX, true}, + {"", 0, false}, + {" 42", 42, false}, + {"\t\n\v\f\r 42", 42, false}, + {"blah42", 0, false}, + {"42blah", 42, false}, + {"blah42blah", 0, false}, + {"-273.15", -273, false}, + {"+98.6", 98, false}, + {"--123", 0, false}, + {"++123", 0, false}, + {"-+123", 0, false}, + {"+-123", 0, false}, + {"-", 0, false}, + {"-2147483649", INT_MIN, false}, + {"-99999999999", INT_MIN, false}, + {"2147483648", INT_MAX, false}, + {"99999999999", INT_MAX, false}, + }; + + for (int i = 0; i < arraysize(cases); ++i) { + EXPECT_EQ(cases[i].output, StringToInt(cases[i].input)); + int output; + EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output)); + EXPECT_EQ(cases[i].output, output); + + std::wstring wide_input = ASCIIToWide(cases[i].input); + EXPECT_EQ(cases[i].output, StringToInt(wide_input)); + EXPECT_EQ(cases[i].success, StringToInt(wide_input, &output)); + EXPECT_EQ(cases[i].output, output); + } + + // One additional test to verify that conversion of numbers in strings with + // embedded NUL characters. The NUL and extra data after it should be + // interpreted as junk after the number. + const char input[] = "6\06"; + std::string input_string(input, arraysize(input) - 1); + int output; + EXPECT_FALSE(StringToInt(input_string, &output)); + EXPECT_EQ(6, output); + + std::wstring wide_input = ASCIIToWide(input_string); + EXPECT_FALSE(StringToInt(wide_input, &output)); + EXPECT_EQ(6, output); +} + +TEST(StringUtilTest, StringToInt64) { + static const struct { + std::string input; + int64 output; + bool success; + } cases[] = { + {"0", 0, true}, + {"42", 42, true}, + {"-2147483648", INT_MIN, true}, + {"2147483647", INT_MAX, true}, + {"-2147483649", GG_INT64_C(-2147483649), true}, + {"-99999999999", GG_INT64_C(-99999999999), true}, + {"2147483648", GG_INT64_C(2147483648), true}, + {"99999999999", GG_INT64_C(99999999999), true}, + {"9223372036854775807", kint64max, true}, + {"-9223372036854775808", kint64min, true}, + {"", 0, false}, + {" 42", 42, false}, + {"\t\n\v\f\r 42", 42, false}, + {"blah42", 0, false}, + {"42blah", 42, false}, + {"blah42blah", 0, false}, + {"-273.15", -273, false}, + {"+98.6", 98, false}, + {"--123", 0, false}, + {"++123", 0, false}, + {"-+123", 0, false}, + {"+-123", 0, false}, + {"-", 0, false}, + {"-9223372036854775809", kint64min, false}, + {"-99999999999999999999", kint64min, false}, + {"9223372036854775808", kint64max, false}, + {"99999999999999999999", kint64max, false}, + }; + + for (int i = 0; i < arraysize(cases); ++i) { + EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input)); + int64 output; + EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output)); + EXPECT_EQ(cases[i].output, output); + + std::wstring wide_input = ASCIIToWide(cases[i].input); + EXPECT_EQ(cases[i].output, StringToInt64(wide_input)); + EXPECT_EQ(cases[i].success, StringToInt64(wide_input, &output)); + EXPECT_EQ(cases[i].output, output); + } + + // One additional test to verify that conversion of numbers in strings with + // embedded NUL characters. The NUL and extra data after it should be + // interpreted as junk after the number. + const char input[] = "6\06"; + std::string input_string(input, arraysize(input) - 1); + int64 output; + EXPECT_FALSE(StringToInt64(input_string, &output)); + EXPECT_EQ(6, output); + + std::wstring wide_input = ASCIIToWide(input_string); + EXPECT_FALSE(StringToInt64(wide_input, &output)); + EXPECT_EQ(6, output); +} + +TEST(StringUtilTest, HexStringToInt) { + static const struct { + std::string input; + int output; + bool success; + } cases[] = { + {"0", 0, true}, + {"42", 66, true}, + {"-42", -66, true}, + {"+42", 66, true}, + {"7fffffff", INT_MAX, true}, + {"80000000", INT_MIN, true}, + {"ffffffff", -1, true}, + {"DeadBeef", 0xdeadbeef, true}, + {"0x42", 66, true}, + {"-0x42", -66, true}, + {"+0x42", 66, true}, + {"0x7fffffff", INT_MAX, true}, + {"0x80000000", INT_MIN, true}, + {"0xffffffff", -1, true}, + {"0XDeadBeef", 0xdeadbeef, true}, + {" 45", 0x45, false}, + {"\t\n\v\f\r 0x45", 0x45, false}, + {"efgh", 0xef, false}, + {"0xefgh", 0xef, false}, + {"hgfe", 0, false}, + {"100000000", -1, false}, // don't care about |output|, just |success| + {"-", 0, false}, + {"", 0, false}, + }; + + for (int i = 0; i < arraysize(cases); ++i) { + EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input)); + int output; + EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output)); + EXPECT_EQ(cases[i].output, output); + + std::wstring wide_input = ASCIIToWide(cases[i].input); + EXPECT_EQ(cases[i].output, HexStringToInt(wide_input)); + EXPECT_EQ(cases[i].success, HexStringToInt(wide_input, &output)); + EXPECT_EQ(cases[i].output, output); + } + // One additional test to verify that conversion of numbers in strings with + // embedded NUL characters. The NUL and extra data after it should be + // interpreted as junk after the number. + const char input[] = "0xc0ffee\09"; + std::string input_string(input, arraysize(input) - 1); + int output; + EXPECT_FALSE(HexStringToInt(input_string, &output)); + EXPECT_EQ(0xc0ffee, output); + + std::wstring wide_input = ASCIIToWide(input_string); + EXPECT_FALSE(HexStringToInt(wide_input, &output)); + EXPECT_EQ(0xc0ffee, output); +} + // This checks where we can use the assignment operator for a va_list. We need // a way to do this since Visual C doesn't support va_copy, but assignment on // va_list is not guaranteed to be a copy. See StringAppendVT which uses this diff --git a/base/string_util_win.cc b/base/string_util_win.cc index 6cad854..fac24a7 100644 --- a/base/string_util_win.cc +++ b/base/string_util_win.cc @@ -114,11 +114,3 @@ NumberFormat* NumberFormatSingleton() { } return number_format; } - -int64 StringToInt64(const std::string& value) { - return _atoi64(value.c_str()); -} - -int64 StringToInt64(const std::wstring& value) { - return _wtoi64(value.c_str()); -} |