diff options
Diffstat (limited to 'third_party/libphonenumber/cpp/src/utf/unilib.h')
-rw-r--r-- | third_party/libphonenumber/cpp/src/utf/unilib.h | 95 |
1 files changed, 0 insertions, 95 deletions
diff --git a/third_party/libphonenumber/cpp/src/utf/unilib.h b/third_party/libphonenumber/cpp/src/utf/unilib.h deleted file mode 100644 index 4cfc787..0000000 --- a/third_party/libphonenumber/cpp/src/utf/unilib.h +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Copyright 2010 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Routines to do manipulation of Unicode characters or text -// -// The StructurallyValid routines accept buffers of arbitrary bytes. -// For CoerceToStructurallyValid(), the input buffer and output buffers may -// point to exactly the same memory. -// -// In all other cases, the UTF-8 string must be structurally valid and -// have all codepoints in the range U+0000 to U+D7FF or U+E000 to U+10FFFF. -// Debug builds take a fatal error for invalid UTF-8 input. -// The input and output buffers may not overlap at all. -// -// The char32 routines are here only for convenience; they convert to UTF-8 -// internally and use the UTF-8 routines. - -#ifndef UTIL_UTF8_UNILIB_H__ -#define UTIL_UTF8_UNILIB_H__ - -#include <string> -#include "base/basictypes.h" - -namespace UniLib { - -// Returns true unless a surrogate code point -inline bool IsValidCodepoint(char32 c) { - // In the range [0, 0xD800) or [0xE000, 0x10FFFF] - return (static_cast<uint32>(c) < 0xD800) - || (c >= 0xE000 && c <= 0x10FFFF); -} - -// Table of UTF-8 character lengths, based on first byte -static const unsigned char kUTF8LenTbl[256] = { - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4 -}; - -// Return length of a single UTF-8 source character -inline int OneCharLen(const char* src) { - return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)]; -} - -// Return length of a single UTF-8 source character -inline int OneCharLen(const uint8* src) { - return kUTF8LenTbl[*src]; -} - -// Return true if this byte is a trailing UTF-8 byte (10xx xxxx) -inline bool IsTrailByte(char x) { - // return (x & 0xC0) == 0x80; - // Since trail bytes are always in [0x80, 0xBF], we can optimize: - return static_cast<signed char>(x) < -0x40; -} - -// Returns the length in bytes of the prefix of src that is all -// interchange valid UTF-8 -int SpanInterchangeValid(const char* src, int byte_length); -inline int SpanInterchangeValid(const std::string& src) { - return SpanInterchangeValid(src.data(), src.size()); -} - -// Returns true if the source is all interchange valid UTF-8 -// "Interchange valid" is a stronger than structurally valid -- -// no C0 or C1 control codes (other than CR LF HT FF) and no non-characters. -inline bool IsInterchangeValid(const char* src, int byte_length) { - return (byte_length == SpanInterchangeValid(src, byte_length)); -} -inline bool IsInterchangeValid(const std::string& src) { - return IsInterchangeValid(src.data(), src.size()); -} - -} // namespace UniLib - -#endif // UTIL_UTF8_PUBLIC_UNILIB_H_ |