summaryrefslogtreecommitdiffstats
path: root/third_party/libphonenumber/cpp/src/utf
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libphonenumber/cpp/src/utf')
-rw-r--r--third_party/libphonenumber/cpp/src/utf/README1
-rw-r--r--third_party/libphonenumber/cpp/src/utf/rune.c350
-rw-r--r--third_party/libphonenumber/cpp/src/utf/stringpiece.h24
-rw-r--r--third_party/libphonenumber/cpp/src/utf/stringprintf.h22
-rw-r--r--third_party/libphonenumber/cpp/src/utf/unicodetext.cc515
-rw-r--r--third_party/libphonenumber/cpp/src/utf/unicodetext.h456
-rw-r--r--third_party/libphonenumber/cpp/src/utf/unilib.cc64
-rw-r--r--third_party/libphonenumber/cpp/src/utf/unilib.h95
-rw-r--r--third_party/libphonenumber/cpp/src/utf/utf.h251
-rw-r--r--third_party/libphonenumber/cpp/src/utf/utfdef.h28
10 files changed, 0 insertions, 1806 deletions
diff --git a/third_party/libphonenumber/cpp/src/utf/README b/third_party/libphonenumber/cpp/src/utf/README
deleted file mode 100644
index 986e9e3..0000000
--- a/third_party/libphonenumber/cpp/src/utf/README
+++ /dev/null
@@ -1 +0,0 @@
-These files come from lib9 (http://code.google.com/p/go/source/browse).
diff --git a/third_party/libphonenumber/cpp/src/utf/rune.c b/third_party/libphonenumber/cpp/src/utf/rune.c
deleted file mode 100644
index 5a37368..0000000
--- a/third_party/libphonenumber/cpp/src/utf/rune.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * The authors of this software are Rob Pike and Ken Thompson.
- * Copyright (c) 2002 by Lucent Technologies.
- * Permission to use, copy, modify, and distribute this software for any
- * purpose without fee is hereby granted, provided that this entire notice
- * is included in all copies of any software which is or includes a copy
- * or modification of this software and in all copies of the supporting
- * documentation for such software.
- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
- */
-#include <stdarg.h>
-#include <string.h>
-#include "utf.h"
-#include "utfdef.h"
-
-enum
-{
- Bit1 = 7,
- Bitx = 6,
- Bit2 = 5,
- Bit3 = 4,
- Bit4 = 3,
- Bit5 = 2,
-
- T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
- Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
- T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
- T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
- T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
- T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
-
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
- Rune4 = (1<<(Bit4+3*Bitx))-1,
- /* 0001 1111 1111 1111 1111 1111 */
-
- Maskx = (1<<Bitx)-1, /* 0011 1111 */
- Testx = Maskx ^ 0xFF, /* 1100 0000 */
-
- Bad = Runeerror,
-};
-
-/*
- * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
- * This is a slower but "safe" version of the old chartorune
- * that works on strings that are not necessarily null-terminated.
- *
- * If you know for sure that your string is null-terminated,
- * chartorune will be a bit faster.
- *
- * It is guaranteed not to attempt to access "length"
- * past the incoming pointer. This is to avoid
- * possible access violations. If the string appears to be
- * well-formed but incomplete (i.e., to get the whole Rune
- * we'd need to read past str+length) then we'll set the Rune
- * to Bad and return 0.
- *
- * Note that if we have decoding problems for other
- * reasons, we return 1 instead of 0.
- */
-int
-charntorune(Rune *rune, const char *str, int length)
-{
- int c, c1, c2, c3;
- long l;
-
- /* When we're not allowed to read anything */
- if(length <= 0) {
- goto badlen;
- }
-
- /*
- * one character sequence (7-bit value)
- * 00000-0007F => T1
- */
- c = *(uchar*)str;
- if(c < Tx) {
- *rune = c;
- return 1;
- }
-
- // If we can't read more than one character we must stop
- if(length <= 1) {
- goto badlen;
- }
-
- /*
- * two character sequence (11-bit value)
- * 0080-07FF => T2 Tx
- */
- c1 = *(uchar*)(str+1) ^ Tx;
- if(c1 & Testx)
- goto bad;
- if(c < T3) {
- if(c < T2)
- goto bad;
- l = ((c << Bitx) | c1) & Rune2;
- if(l <= Rune1)
- goto bad;
- *rune = l;
- return 2;
- }
-
- // If we can't read more than two characters we must stop
- if(length <= 2) {
- goto badlen;
- }
-
- /*
- * three character sequence (16-bit value)
- * 0800-FFFF => T3 Tx Tx
- */
- c2 = *(uchar*)(str+2) ^ Tx;
- if(c2 & Testx)
- goto bad;
- if(c < T4) {
- l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
- if(l <= Rune2)
- goto bad;
- *rune = l;
- return 3;
- }
-
- if (length <= 3)
- goto badlen;
-
- /*
- * four character sequence (21-bit value)
- * 10000-1FFFFF => T4 Tx Tx Tx
- */
- c3 = *(uchar*)(str+3) ^ Tx;
- if (c3 & Testx)
- goto bad;
- if (c < T5) {
- l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
- if (l <= Rune3)
- goto bad;
- *rune = l;
- return 4;
- }
-
- // Support for 5-byte or longer UTF-8 would go here, but
- // since we don't have that, we'll just fall through to bad.
-
- /*
- * bad decoding
- */
-bad:
- *rune = Bad;
- return 1;
-badlen:
- *rune = Bad;
- return 0;
-
-}
-
-
-/*
- * This is the older "unsafe" version, which works fine on
- * null-terminated strings.
- */
-int
-chartorune(Rune *rune, const char *str)
-{
- int c, c1, c2, c3;
- long l;
-
- /*
- * one character sequence
- * 00000-0007F => T1
- */
- c = *(uchar*)str;
- if(c < Tx) {
- *rune = c;
- return 1;
- }
-
- /*
- * two character sequence
- * 0080-07FF => T2 Tx
- */
- c1 = *(uchar*)(str+1) ^ Tx;
- if(c1 & Testx)
- goto bad;
- if(c < T3) {
- if(c < T2)
- goto bad;
- l = ((c << Bitx) | c1) & Rune2;
- if(l <= Rune1)
- goto bad;
- *rune = l;
- return 2;
- }
-
- /*
- * three character sequence
- * 0800-FFFF => T3 Tx Tx
- */
- c2 = *(uchar*)(str+2) ^ Tx;
- if(c2 & Testx)
- goto bad;
- if(c < T4) {
- l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
- if(l <= Rune2)
- goto bad;
- *rune = l;
- return 3;
- }
-
- /*
- * four character sequence (21-bit value)
- * 10000-1FFFFF => T4 Tx Tx Tx
- */
- c3 = *(uchar*)(str+3) ^ Tx;
- if (c3 & Testx)
- goto bad;
- if (c < T5) {
- l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
- if (l <= Rune3)
- goto bad;
- *rune = l;
- return 4;
- }
-
- /*
- * Support for 5-byte or longer UTF-8 would go here, but
- * since we don't have that, we'll just fall through to bad.
- */
-
- /*
- * bad decoding
- */
-bad:
- *rune = Bad;
- return 1;
-}
-
-int
-isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
- *consumed = charntorune(rune, str, length);
- return *rune != Runeerror || *consumed == 3;
-}
-
-int
-runetochar(char *str, const Rune *rune)
-{
- /* Runes are signed, so convert to unsigned for range check. */
- unsigned long c;
-
- /*
- * one character sequence
- * 00000-0007F => 00-7F
- */
- c = *rune;
- if(c <= Rune1) {
- str[0] = c;
- return 1;
- }
-
- /*
- * two character sequence
- * 0080-07FF => T2 Tx
- */
- if(c <= Rune2) {
- str[0] = T2 | (c >> 1*Bitx);
- str[1] = Tx | (c & Maskx);
- return 2;
- }
-
- /*
- * If the Rune is out of range, convert it to the error rune.
- * Do this test here because the error rune encodes to three bytes.
- * Doing it earlier would duplicate work, since an out of range
- * Rune wouldn't have fit in one or two bytes.
- */
- if (c > Runemax)
- c = Runeerror;
-
- /*
- * three character sequence
- * 0800-FFFF => T3 Tx Tx
- */
- if (c <= Rune3) {
- str[0] = T3 | (c >> 2*Bitx);
- str[1] = Tx | ((c >> 1*Bitx) & Maskx);
- str[2] = Tx | (c & Maskx);
- return 3;
- }
-
- /*
- * four character sequence (21-bit value)
- * 10000-1FFFFF => T4 Tx Tx Tx
- */
- str[0] = T4 | (c >> 3*Bitx);
- str[1] = Tx | ((c >> 2*Bitx) & Maskx);
- str[2] = Tx | ((c >> 1*Bitx) & Maskx);
- str[3] = Tx | (c & Maskx);
- return 4;
-}
-
-int
-runelen(Rune rune)
-{
- char str[10];
-
- return runetochar(str, &rune);
-}
-
-int
-runenlen(const Rune *r, int nrune)
-{
- int nb, c;
-
- nb = 0;
- while(nrune--) {
- c = *r++;
- if (c <= Rune1)
- nb++;
- else if (c <= Rune2)
- nb += 2;
- else if (c <= Rune3)
- nb += 3;
- else /* assert(c <= Rune4) */
- nb += 4;
- }
- return nb;
-}
-
-int
-fullrune(const char *str, int n)
-{
- if (n > 0) {
- int c = *(uchar*)str;
- if (c < Tx)
- return 1;
- if (n > 1) {
- if (c < T3)
- return 1;
- if (n > 2) {
- if (c < T4 || n > 3)
- return 1;
- }
- }
- }
- return 0;
-}
diff --git a/third_party/libphonenumber/cpp/src/utf/stringpiece.h b/third_party/libphonenumber/cpp/src/utf/stringpiece.h
deleted file mode 100644
index 7b56772..0000000
--- a/third_party/libphonenumber/cpp/src/utf/stringpiece.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
- * Copyright 2010 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef STRINGS_STRINGPIECE_H_
-#define STRINGS_STRINGPIECE_H_
-
-//#include "third_party/chromium/src/base/string_piece.h"
-#include "base/string_piece.h"
-
-using base::StringPiece;
-
-#endif // STRINGS_STRINGPIECE_H_
diff --git a/third_party/libphonenumber/cpp/src/utf/stringprintf.h b/third_party/libphonenumber/cpp/src/utf/stringprintf.h
deleted file mode 100644
index 208d338f..0000000
--- a/third_party/libphonenumber/cpp/src/utf/stringprintf.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Copyright 2010 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef STRINGS_STRINGPRINTF_H_
-#define STRINGS_STRINGPRINTF_H_
-
-//#include "third_party/chromium/src/base/string_util.h"
-#include "base/string_util.h"
-
-#endif // STRINGS_STRINGPRINTF_H_
diff --git a/third_party/libphonenumber/cpp/src/utf/unicodetext.cc b/third_party/libphonenumber/cpp/src/utf/unicodetext.cc
deleted file mode 100644
index 82c1b42..0000000
--- a/third_party/libphonenumber/cpp/src/utf/unicodetext.cc
+++ /dev/null
@@ -1,515 +0,0 @@
-// Copyright (C) 2006 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Author: Jim Meehan
-
-#include <iostream>
-#include <sstream>
-#include <cassert>
-
-#include "utf/unicodetext.h"
-//#include "base/logging.h"
-#include "utf/stringpiece.h"
-//#include "utf/stringprintf.h"
-#include "utf/utf.h"
-#include "utf/unilib.h"
-
-using std::stringstream;
-using std::max;
-using std::hex;
-using std::dec;
-using std::cerr;
-using std::endl;
-
-static int CodepointDistance(const char* start, const char* end) {
- int n = 0;
- // Increment n on every non-trail-byte.
- for (const char* p = start; p < end; ++p) {
- n += (*reinterpret_cast<const signed char*>(p) >= -0x40);
- }
- return n;
-}
-
-static int CodepointCount(const char* utf8, int len) {
- return CodepointDistance(utf8, utf8 + len);
-}
-
-UnicodeText::const_iterator::difference_type
-distance(const UnicodeText::const_iterator& first,
- const UnicodeText::const_iterator& last) {
- return CodepointDistance(first.it_, last.it_);
-}
-
-// ---------- Utility ----------
-
-static int ConvertToInterchangeValid(char* start, int len) {
- // This routine is called only when we've discovered that a UTF-8 buffer
- // that was passed to CopyUTF8, TakeOwnershipOfUTF8, or PointToUTF8
- // was not interchange valid. This indicates a bug in the caller, and
- // a LOG(WARNING) is done in that case.
- // This is similar to CoerceToInterchangeValid, but it replaces each
- // structurally valid byte with a space, and each non-interchange
- // character with a space, even when that character requires more
- // than one byte in UTF8. E.g., "\xEF\xB7\x90" (U+FDD0) is
- // structurally valid UTF8, but U+FDD0 is not an interchange-valid
- // code point. The result should contain one space, not three.
- //
- // Since the conversion never needs to write more data than it
- // reads, it is safe to change the buffer in place. It returns the
- // number of bytes written.
- char* const in = start;
- char* out = start;
- char* const end = start + len;
- while (start < end) {
- int good = UniLib::SpanInterchangeValid(start, end - start);
- if (good > 0) {
- if (out != start) {
- memmove(out, start, good);
- }
- out += good;
- start += good;
- if (start == end) {
- break;
- }
- }
- // Is the current string invalid UTF8 or just non-interchange UTF8?
- char32 rune;
- int n;
- if (isvalidcharntorune(start, end - start, &rune, &n)) {
- // structurally valid UTF8, but not interchange valid
- start += n; // Skip over the whole character.
- } else { // bad UTF8
- start += 1; // Skip over just one byte
- }
- *out++ = ' ';
- }
- return out - in;
-}
-
-
-// *************** Data representation **********
-
-// Note: the copy constructor is undefined.
-
-// After reserve(), resize(), or clear(), we're an owner, not an alias.
-
-void UnicodeText::Repr::reserve(int new_capacity) {
- // If there's already enough capacity, and we're an owner, do nothing.
- if (capacity_ >= new_capacity && ours_) return;
-
- // Otherwise, allocate a new buffer.
- capacity_ = max(new_capacity, (3 * capacity_) / 2 + 20);
- char* new_data = new char[capacity_];
-
- // If there is an old buffer, copy it into the new buffer.
- if (data_) {
- memcpy(new_data, data_, size_);
- if (ours_) delete[] data_; // If we owned the old buffer, free it.
- }
- data_ = new_data;
- ours_ = true; // We own the new buffer.
- // size_ is unchanged.
-}
-
-void UnicodeText::Repr::resize(int new_size) {
- if (new_size == 0) {
- clear();
- } else {
- if (!ours_ || new_size > capacity_) reserve(new_size);
- // Clear the memory in the expanded part.
- if (size_ < new_size) memset(data_ + size_, 0, new_size - size_);
- size_ = new_size;
- ours_ = true;
- }
-}
-
-// This implementation of clear() deallocates the buffer if we're an owner.
-// That's not strictly necessary; we could just set size_ to 0.
-void UnicodeText::Repr::clear() {
- if (ours_) delete[] data_;
- data_ = NULL;
- size_ = capacity_ = 0;
- ours_ = true;
-}
-
-void UnicodeText::Repr::Copy(const char* data, int size) {
- resize(size);
- memcpy(data_, data, size);
-}
-
-void UnicodeText::Repr::TakeOwnershipOf(char* data, int size, int capacity) {
- if (data == data_) return; // We already own this memory. (Weird case.)
- if (ours_ && data_) delete[] data_; // If we owned the old buffer, free it.
- data_ = data;
- size_ = size;
- capacity_ = capacity;
- ours_ = true;
-}
-
-void UnicodeText::Repr::PointTo(const char* data, int size) {
- if (ours_ && data_) delete[] data_; // If we owned the old buffer, free it.
- data_ = const_cast<char*>(data);
- size_ = size;
- capacity_ = size;
- ours_ = false;
-}
-
-void UnicodeText::Repr::append(const char* bytes, int byte_length) {
- reserve(size_ + byte_length);
- memcpy(data_ + size_, bytes, byte_length);
- size_ += byte_length;
-}
-
-string UnicodeText::Repr::DebugString() const {
- stringstream ss;
-
- ss << "{Repr " << hex << this << " data=" << data_ << " size=" << dec
- << size_ << " capacity=" << capacity_ << " "
- << (ours_ ? "Owned" : "Alias") << "}";
-
- string result;
- ss >> result;
-
- return result;
-}
-
-
-
-// *************** UnicodeText ******************
-
-// ----- Constructors -----
-
-// Default constructor
-UnicodeText::UnicodeText() {
-}
-
-// Copy constructor
-UnicodeText::UnicodeText(const UnicodeText& src) {
- Copy(src);
-}
-
-// Substring constructor
-UnicodeText::UnicodeText(const UnicodeText::const_iterator& first,
- const UnicodeText::const_iterator& last) {
- assert(first <= last && "Incompatible iterators");
- repr_.append(first.it_, last.it_ - first.it_);
-}
-
-string UnicodeText::UTF8Substring(const const_iterator& first,
- const const_iterator& last) {
- assert(first <= last && "Incompatible iterators");
- return string(first.it_, last.it_ - first.it_);
-}
-
-
-// ----- Copy -----
-
-UnicodeText& UnicodeText::operator=(const UnicodeText& src) {
- if (this != &src) {
- Copy(src);
- }
- return *this;
-}
-
-UnicodeText& UnicodeText::Copy(const UnicodeText& src) {
- repr_.Copy(src.repr_.data_, src.repr_.size_);
- return *this;
-}
-
-UnicodeText& UnicodeText::CopyUTF8(const char* buffer, int byte_length) {
- repr_.Copy(buffer, byte_length);
- if (!UniLib:: IsInterchangeValid(buffer, byte_length)) {
- cerr << "UTF-8 buffer is not interchange-valid." << endl;
- repr_.size_ = ConvertToInterchangeValid(repr_.data_, byte_length);
- }
- return *this;
-}
-
-UnicodeText& UnicodeText::UnsafeCopyUTF8(const char* buffer,
- int byte_length) {
- repr_.Copy(buffer, byte_length);
- return *this;
-}
-
-// ----- TakeOwnershipOf -----
-
-UnicodeText& UnicodeText::TakeOwnershipOfUTF8(char* buffer,
- int byte_length,
- int byte_capacity) {
- repr_.TakeOwnershipOf(buffer, byte_length, byte_capacity);
- if (!UniLib:: IsInterchangeValid(buffer, byte_length)) {
- cerr << "UTF-8 buffer is not interchange-valid." << endl;
- repr_.size_ = ConvertToInterchangeValid(repr_.data_, byte_length);
- }
- return *this;
-}
-
-UnicodeText& UnicodeText::UnsafeTakeOwnershipOfUTF8(char* buffer,
- int byte_length,
- int byte_capacity) {
- repr_.TakeOwnershipOf(buffer, byte_length, byte_capacity);
- return *this;
-}
-
-// ----- PointTo -----
-
-UnicodeText& UnicodeText::PointToUTF8(const char* buffer, int byte_length) {
- if (UniLib:: IsInterchangeValid(buffer, byte_length)) {
- repr_.PointTo(buffer, byte_length);
- } else {
- cerr << "UTF-8 buffer is not interchange-valid." << endl;
- repr_.Copy(buffer, byte_length);
- repr_.size_ = ConvertToInterchangeValid(repr_.data_, byte_length);
- }
- return *this;
-}
-
-UnicodeText& UnicodeText::UnsafePointToUTF8(const char* buffer,
- int byte_length) {
- repr_.PointTo(buffer, byte_length);
- return *this;
-}
-
-UnicodeText& UnicodeText::PointTo(const UnicodeText& src) {
- repr_.PointTo(src.repr_.data_, src.repr_.size_);
- return *this;
-}
-
-UnicodeText& UnicodeText::PointTo(const const_iterator &first,
- const const_iterator &last) {
- assert(first <= last && " Incompatible iterators");
- repr_.PointTo(first.utf8_data(), last.utf8_data() - first.utf8_data());
- return *this;
-}
-
-// ----- Append -----
-
-UnicodeText& UnicodeText::append(const UnicodeText& u) {
- repr_.append(u.repr_.data_, u.repr_.size_);
- return *this;
-}
-
-UnicodeText& UnicodeText::append(const const_iterator& first,
- const const_iterator& last) {
- assert(first <= last && "Incompatible iterators");
- repr_.append(first.it_, last.it_ - first.it_);
- return *this;
-}
-
-UnicodeText& UnicodeText::UnsafeAppendUTF8(const char* utf8, int len) {
- repr_.append(utf8, len);
- return *this;
-}
-
-// ----- substring searching -----
-
-UnicodeText::const_iterator UnicodeText::find(const UnicodeText& look,
- const_iterator start_pos) const {
- assert(start_pos.utf8_data() >= utf8_data());
- assert(start_pos.utf8_data() <= utf8_data() + utf8_length());
- return UnsafeFind(look, start_pos);
-}
-
-UnicodeText::const_iterator UnicodeText::find(const UnicodeText& look) const {
- return UnsafeFind(look, begin());
-}
-
-UnicodeText::const_iterator UnicodeText::UnsafeFind(
- const UnicodeText& look, const_iterator start_pos) const {
- // Due to the magic of the UTF8 encoding, searching for a sequence of
- // letters is equivalent to substring search.
- StringPiece searching(utf8_data(), utf8_length());
- StringPiece look_piece(look.utf8_data(), look.utf8_length());
- StringPiece::size_type found =
- searching.find(look_piece, start_pos.utf8_data() - utf8_data());
- if (found == StringPiece::npos) return end();
- return const_iterator(utf8_data() + found);
-}
-
-bool UnicodeText::HasReplacementChar() const {
- // Equivalent to:
- // UnicodeText replacement_char;
- // replacement_char.push_back(0xFFFD);
- // return find(replacement_char) != end();
- StringPiece searching(utf8_data(), utf8_length());
- StringPiece looking_for("\xEF\xBF\xBD", 3);
- return searching.find(looking_for) != StringPiece::npos;
-}
-
-// ----- other methods -----
-
-// Clear operator
-void UnicodeText::clear() {
- repr_.clear();
-}
-
-// Destructor
-UnicodeText::~UnicodeText() {}
-
-
-void UnicodeText::push_back(char32 c) {
- if (UniLib::IsValidCodepoint(c)) {
- char buf[UTFmax];
- int len = runetochar(buf, &c);
- if (UniLib::IsInterchangeValid(buf, len)) {
- repr_.append(buf, len);
- } else {
- cerr << "Unicode value 0x" << hex << c
- << " is not valid for interchange" << endl;
- repr_.append(" ", 1);
- }
- } else {
- cerr << "Illegal Unicode value: 0x" << hex << c << endl;
- repr_.append(" ", 1);
- }
-}
-
-int UnicodeText::size() const {
- return CodepointCount(repr_.data_, repr_.size_);
-}
-
-bool operator==(const UnicodeText& lhs, const UnicodeText& rhs) {
- if (&lhs == &rhs) return true;
- if (lhs.repr_.size_ != rhs.repr_.size_) return false;
- return memcmp(lhs.repr_.data_, rhs.repr_.data_, lhs.repr_.size_) == 0;
-}
-
-string UnicodeText::DebugString() const {
- stringstream ss;
-
- ss << "{UnicodeText " << hex << this << dec << " chars="
- << size() << " repr=" << repr_.DebugString() << "}";
-#if 0
- return StringPrintf("{UnicodeText %p chars=%d repr=%s}",
- this,
- size(),
- repr_.DebugString().c_str());
-#endif
- string result;
- ss >> result;
-
- return result;
-}
-
-
-// ******************* UnicodeText::const_iterator *********************
-
-// The implementation of const_iterator would be nicer if it
-// inherited from boost::iterator_facade
-// (http://boost.org/libs/iterator/doc/iterator_facade.html).
-
-UnicodeText::const_iterator::const_iterator() : it_(0) {}
-
-UnicodeText::const_iterator::const_iterator(const const_iterator& other)
- : it_(other.it_) {
-}
-
-UnicodeText::const_iterator&
-UnicodeText::const_iterator::operator=(const const_iterator& other) {
- if (&other != this)
- it_ = other.it_;
- return *this;
-}
-
-UnicodeText::const_iterator UnicodeText::begin() const {
- return const_iterator(repr_.data_);
-}
-
-UnicodeText::const_iterator UnicodeText::end() const {
- return const_iterator(repr_.data_ + repr_.size_);
-}
-
-bool operator<(const UnicodeText::const_iterator& lhs,
- const UnicodeText::const_iterator& rhs) {
- return lhs.it_ < rhs.it_;
-}
-
-char32 UnicodeText::const_iterator::operator*() const {
- // (We could call chartorune here, but that does some
- // error-checking, and we're guaranteed that our data is valid
- // UTF-8. Also, we expect this routine to be called very often. So
- // for speed, we do the calculation ourselves.)
-
- // Convert from UTF-8
- int byte1 = it_[0];
- if (byte1 < 0x80)
- return byte1;
-
- int byte2 = it_[1];
- if (byte1 < 0xE0)
- return ((byte1 & 0x1F) << 6)
- | (byte2 & 0x3F);
-
- int byte3 = it_[2];
- if (byte1 < 0xF0)
- return ((byte1 & 0x0F) << 12)
- | ((byte2 & 0x3F) << 6)
- | (byte3 & 0x3F);
-
- int byte4 = it_[3];
- return ((byte1 & 0x07) << 18)
- | ((byte2 & 0x3F) << 12)
- | ((byte3 & 0x3F) << 6)
- | (byte4 & 0x3F);
-}
-
-UnicodeText::const_iterator& UnicodeText::const_iterator::operator++() {
- it_ += UniLib::OneCharLen(it_);
- return *this;
-}
-
-UnicodeText::const_iterator& UnicodeText::const_iterator::operator--() {
- while (UniLib::IsTrailByte(*--it_));
- return *this;
-}
-
-int UnicodeText::const_iterator::get_utf8(char* utf8_output) const {
- utf8_output[0] = it_[0];
- if (static_cast<unsigned char>(it_[0]) < 0x80)
- return 1;
-
- utf8_output[1] = it_[1];
- if (static_cast<unsigned char>(it_[0]) < 0xE0)
- return 2;
-
- utf8_output[2] = it_[2];
- if (static_cast<unsigned char>(it_[0]) < 0xF0)
- return 3;
-
- utf8_output[3] = it_[3];
- return 4;
-}
-
-
-UnicodeText::const_iterator UnicodeText::MakeIterator(const char* p) const {
- assert(p != NULL);
- const char* start = utf8_data();
- int len = utf8_length();
- const char* end = start + len;
- assert(p >= start);
- assert(p <= end);
- assert(p == end || !UniLib::IsTrailByte(*p));
- return const_iterator(p);
-}
-
-string UnicodeText::const_iterator::DebugString() const {
- stringstream ss;
-
- ss << "{iter " << hex << it_ << "}";
- string result;
- ss >> result;
-
- return result;
-}
-
diff --git a/third_party/libphonenumber/cpp/src/utf/unicodetext.h b/third_party/libphonenumber/cpp/src/utf/unicodetext.h
deleted file mode 100644
index fb37a33..0000000
--- a/third_party/libphonenumber/cpp/src/utf/unicodetext.h
+++ /dev/null
@@ -1,456 +0,0 @@
-// Copyright (C) 2006 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Author: Jim Meehan
-
-#ifndef UTIL_UTF8_UNICODETEXT_H__
-#define UTIL_UTF8_UNICODETEXT_H__
-
-#include <iterator>
-#include <string>
-#include <utility>
-#include "base/basictypes.h"
-//#include "util/utf8/public/config.h"
-
-using std::string;
-using std::bidirectional_iterator_tag;
-using std::pair;
-
-// ***************************** UnicodeText **************************
-//
-// A UnicodeText object is a container for a sequence of Unicode
-// codepoint values. It has default, copy, and assignment constructors.
-// Data can be appended to it from another UnicodeText, from
-// iterators, or from a single codepoint.
-//
-// The internal representation of the text is UTF-8. Since UTF-8 is a
-// variable-width format, UnicodeText does not provide random access
-// to the text, and changes to the text are permitted only at the end.
-//
-// The UnicodeText class defines a const_iterator. The dereferencing
-// operator (*) returns a codepoint (char32). The iterator is a
-// bidirectional, read-only iterator. It becomes invalid if the text
-// is changed.
-//
-// There are methods for appending and retrieving UTF-8 data directly.
-// The 'utf8_data' method returns a const char* that contains the
-// UTF-8-encoded version of the text; 'utf8_length' returns the number
-// of bytes in the UTF-8 data. An iterator's 'get' method stores up to
-// 4 bytes of UTF-8 data in a char array and returns the number of
-// bytes that it stored.
-//
-// Codepoints are integers in the range [0, 0xD7FF] or [0xE000,
-// 0x10FFFF], but UnicodeText has the additional restriction that it
-// can contain only those characters that are valid for interchange on
-// the Web. This excludes all of the control codes except for carriage
-// return, line feed, and horizontal tab. It also excludes
-// non-characters, but codepoints that are in the Private Use regions
-// are allowed, as are codepoints that are unassigned. (See the
-// Unicode reference for details.) The function UniLib::IsInterchangeValid
-// can be used as a test for this property.
-//
-// UnicodeTexts are safe. Every method that constructs or modifies a
-// UnicodeText tests for interchange-validity, and will substitute a
-// space for the invalid data. Such cases are reported via
-// LOG(WARNING).
-//
-// MEMORY MANAGEMENT: copy, take ownership, or point to
-//
-// A UnicodeText is either an "owner", meaning that it owns the memory
-// for the data buffer and will free it when the UnicodeText is
-// destroyed, or it is an "alias", meaning that it does not.
-//
-// There are three methods for storing UTF-8 data in a UnicodeText:
-//
-// CopyUTF8(buffer, len) copies buffer.
-//
-// TakeOwnershipOfUTF8(buffer, size, capacity) takes ownership of buffer.
-//
-// PointToUTF8(buffer, size) creates an alias pointing to buffer.
-//
-// All three methods perform a validity check on the buffer. There are
-// private, "unsafe" versions of these functions that bypass the
-// validity check. They are used internally and by friend-functions
-// that are handling UTF-8 data that has already been validated.
-//
-// The purpose of an alias is to avoid making an unnecessary copy of a
-// UTF-8 buffer while still providing access to the Unicode values
-// within that text through iterators or the fast scanners that are
-// based on UTF-8 state tables. The lifetime of an alias must not
-// exceed the lifetime of the buffer from which it was constructed.
-//
-// The semantics of an alias might be described as "copy on write or
-// repair." The source data is never modified. If push_back() or
-// append() is called on an alias, a copy of the data will be created,
-// and the UnicodeText will become an owner. If clear() is called on
-// an alias, it becomes an (empty) owner.
-//
-// The copy constructor and the assignment operator produce an owner.
-// That is, after direct initialization ("UnicodeText x(y);") or copy
-// initialization ("UnicodeText x = y;") x will be an owner, even if y
-// was an alias. The assignment operator ("x = y;") also produces an
-// owner unless x and y are the same object and y is an alias.
-//
-// Aliases should be used with care. If the source from which an alias
-// was created is freed, or if the contents are changed, while the
-// alias is still in use, fatal errors could result. But it can be
-// quite useful to have a UnicodeText "window" through which to see a
-// UTF-8 buffer without having to pay the price of making a copy.
-//
-// UTILITIES
-//
-// The interfaces in util/utf8/public/textutils.h provide higher-level
-// utilities for dealing with UnicodeTexts, including routines for
-// creating UnicodeTexts (both owners and aliases) from UTF-8 buffers or
-// strings, creating strings from UnicodeTexts, normalizing text for
-// efficient matching or display, and others.
-
-class UnicodeText {
- public:
- class const_iterator;
-
- typedef char32 value_type;
-
- // Constructors. These always produce owners.
- UnicodeText(); // Create an empty text.
- UnicodeText(const UnicodeText& src); // copy constructor
- // Construct a substring (copies the data).
- UnicodeText(const const_iterator& first, const const_iterator& last);
-
- // Assignment operator. This copies the data and produces an owner
- // unless this == &src, e.g., "x = x;", which is a no-op.
- UnicodeText& operator=(const UnicodeText& src);
-
- // x.Copy(y) copies the data from y into x.
- UnicodeText& Copy(const UnicodeText& src);
- inline UnicodeText& assign(const UnicodeText& src) { return Copy(src); }
-
- // x.PointTo(y) changes x so that it points to y's data.
- // It does not copy y or take ownership of y's data.
- UnicodeText& PointTo(const UnicodeText& src);
- UnicodeText& PointTo(const const_iterator& first,
- const const_iterator& last);
-
- ~UnicodeText();
-
- void clear(); // Clear text.
- bool empty() { return repr_.size_ == 0; } // Test if text is empty.
-
- // Add a codepoint to the end of the text.
- // If the codepoint is not interchange-valid, add a space instead
- // and log a warning.
- void push_back(char32 codepoint);
-
- // Generic appending operation.
- // iterator_traits<ForwardIterator>::value_type must be implicitly
- // convertible to char32. Typical uses of this method might include:
- // char32 chars[] = {0x1, 0x2, ...};
- // vector<char32> more_chars = ...;
- // utext.append(chars, chars+arraysize(chars));
- // utext.append(more_chars.begin(), more_chars.end());
- template<typename ForwardIterator>
- UnicodeText& append(ForwardIterator first, const ForwardIterator last) {
- while (first != last) { push_back(*first++); }
- return *this;
- }
-
- // A specialization of the generic append() method.
- UnicodeText& append(const const_iterator& first, const const_iterator& last);
-
- // An optimization of append(source.begin(), source.end()).
- UnicodeText& append(const UnicodeText& source);
-
- int size() const; // the number of Unicode characters (codepoints)
-
- friend bool operator==(const UnicodeText& lhs, const UnicodeText& rhs);
- friend bool operator!=(const UnicodeText& lhs, const UnicodeText& rhs);
-
- class const_iterator {
- typedef const_iterator CI;
- public:
- typedef bidirectional_iterator_tag iterator_category;
- typedef char32 value_type;
- typedef ptrdiff_t difference_type;
- typedef void pointer; // (Not needed.)
- typedef const char32 reference; // (Needed for const_reverse_iterator)
-
- // Iterators are default-constructible.
- const_iterator();
-
- // It's safe to make multiple passes over a UnicodeText.
- const_iterator(const const_iterator& other);
- const_iterator& operator=(const const_iterator& other);
-
- char32 operator*() const; // Dereference
-
- const_iterator& operator++(); // Advance (++iter)
- const_iterator operator++(int) { // (iter++)
- const_iterator result(*this);
- ++*this;
- return result;
- }
-
- const_iterator& operator--(); // Retreat (--iter)
- const_iterator operator--(int) { // (iter--)
- const_iterator result(*this);
- --*this;
- return result;
- }
-
- // We love relational operators.
- friend bool operator==(const CI& lhs, const CI& rhs) {
- return lhs.it_ == rhs.it_; }
- friend bool operator!=(const CI& lhs, const CI& rhs) {
- return !(lhs == rhs); }
- friend bool operator<(const CI& lhs, const CI& rhs);
- friend bool operator>(const CI& lhs, const CI& rhs) {
- return rhs < lhs; }
- friend bool operator<=(const CI& lhs, const CI& rhs) {
- return !(rhs < lhs); }
- friend bool operator>=(const CI& lhs, const CI& rhs) {
- return !(lhs < rhs); }
-
- friend difference_type distance(const CI& first, const CI& last);
-
- // UTF-8-specific methods
- // Store the UTF-8 encoding of the current codepoint into buf,
- // which must be at least 4 bytes long. Return the number of
- // bytes written.
- int get_utf8(char* buf) const;
- // Return the iterator's pointer into the UTF-8 data.
- const char* utf8_data() const { return it_; }
-
- string DebugString() const;
-
- private:
- friend class UnicodeText;
- friend class UnicodeTextUtils;
- friend class UTF8StateTableProperty;
- explicit const_iterator(const char* it) : it_(it) {}
-
- const char* it_;
- };
-
- const_iterator begin() const;
- const_iterator end() const;
-
- class const_reverse_iterator : public std::reverse_iterator<const_iterator> {
- public:
- const_reverse_iterator(const_iterator it) :
- std::reverse_iterator<const_iterator>(it) {}
- const char* utf8_data() const {
- const_iterator tmp_it = base();
- return (--tmp_it).utf8_data();
- }
- int get_utf8(char* buf) const {
- const_iterator tmp_it = base();
- return (--tmp_it).get_utf8(buf);
- }
- };
- const_reverse_iterator rbegin() const {
- return const_reverse_iterator(end());
- }
- const_reverse_iterator rend() const {
- return const_reverse_iterator(begin());
- }
-
- // Substring searching. Returns the beginning of the first
- // occurrence of "look", or end() if not found.
- const_iterator find(const UnicodeText& look, const_iterator start_pos) const;
- // Equivalent to find(look, begin())
- const_iterator find(const UnicodeText& look) const;
-
- // Returns whether this contains the character U+FFFD. This can
- // occur, for example, if the input to Encodings::Decode() had byte
- // sequences that were invalid in the source encoding.
- bool HasReplacementChar() const;
-
- // UTF-8-specific methods
- //
- // Return the data, length, and capacity of UTF-8-encoded version of
- // the text. Length and capacity are measured in bytes.
- const char* utf8_data() const { return repr_.data_; }
- int utf8_length() const { return repr_.size_; }
- int utf8_capacity() const { return repr_.capacity_; }
-
- // Return the UTF-8 data as a string.
- static string UTF8Substring(const const_iterator& first,
- const const_iterator& last);
-
- // There are three methods for initializing a UnicodeText from UTF-8
- // data. They vary in details of memory management. In all cases,
- // the data is tested for interchange-validity. If it is not
- // interchange-valid, a LOG(WARNING) is issued, and each
- // structurally invalid byte and each interchange-invalid codepoint
- // is replaced with a space.
-
- // x.CopyUTF8(buf, len) copies buf into x.
- UnicodeText& CopyUTF8(const char* utf8_buffer, int byte_length);
-
- // x.TakeOwnershipOfUTF8(buf, len, capacity). x takes ownership of
- // buf. buf is not copied.
- UnicodeText& TakeOwnershipOfUTF8(char* utf8_buffer,
- int byte_length,
- int byte_capacity);
-
- // x.PointToUTF8(buf,len) changes x so that it points to buf
- // ("becomes an alias"). It does not take ownership or copy buf.
- // If the buffer is not valid, this has the same effect as
- // CopyUTF8(utf8_buffer, byte_length).
- UnicodeText& PointToUTF8(const char* utf8_buffer, int byte_length);
-
- // Occasionally it is necessary to use functions that operate on the
- // pointer returned by utf8_data(). MakeIterator(p) provides a way
- // to get back to the UnicodeText level. It uses CHECK to ensure
- // that p is a pointer within this object's UTF-8 data, and that it
- // points to the beginning of a character.
- const_iterator MakeIterator(const char* p) const;
-
- string DebugString() const;
-
- private:
- friend class const_iterator;
- friend class UnicodeTextUtils;
-
- class Repr { // A byte-string.
- public:
- char* data_;
- int size_;
- int capacity_;
- bool ours_; // Do we own data_?
-
- Repr() : data_(NULL), size_(0), capacity_(0), ours_(true) {}
- ~Repr() { if (ours_) delete[] data_; }
-
- void clear();
- void reserve(int capacity);
- void resize(int size);
-
- void append(const char* bytes, int byte_length);
- void Copy(const char* data, int size);
- void TakeOwnershipOf(char* data, int size, int capacity);
- void PointTo(const char* data, int size);
-
- string DebugString() const;
-
- private:
- Repr& operator=(const Repr&);
- Repr(const Repr& other);
- };
-
- Repr repr_;
-
- // UTF-8-specific private methods.
- // These routines do not perform a validity check when compiled
- // in opt mode.
- // It is an error to call these methods with UTF-8 data that
- // is not interchange-valid.
- //
- UnicodeText& UnsafeCopyUTF8(const char* utf8_buffer, int byte_length);
- UnicodeText& UnsafeTakeOwnershipOfUTF8(
- char* utf8_buffer, int byte_length, int byte_capacity);
- UnicodeText& UnsafePointToUTF8(const char* utf8_buffer, int byte_length);
- UnicodeText& UnsafeAppendUTF8(const char* utf8_buffer, int byte_length);
- const_iterator UnsafeFind(const UnicodeText& look,
- const_iterator start_pos) const;
-};
-
-bool operator==(const UnicodeText& lhs, const UnicodeText& rhs);
-
-inline bool operator!=(const UnicodeText& lhs, const UnicodeText& rhs) {
- return !(lhs == rhs);
-}
-
-// UnicodeTextRange is a pair of iterators, useful for specifying text
-// segments. If the iterators are ==, the segment is empty.
-typedef pair<UnicodeText::const_iterator,
- UnicodeText::const_iterator> UnicodeTextRange;
-
-inline bool UnicodeTextRangeIsEmpty(const UnicodeTextRange& r) {
- return r.first == r.second;
-}
-
-
-// *************************** Utilities *************************
-
-// A factory function for creating a UnicodeText from a buffer of
-// UTF-8 data. The new UnicodeText takes ownership of the buffer. (It
-// is an "owner.")
-//
-// Each byte that is structurally invalid will be replaced with a
-// space. Each codepoint that is interchange-invalid will also be
-// replaced with a space, even if the codepoint was represented with a
-// multibyte sequence in the UTF-8 data.
-//
-inline UnicodeText MakeUnicodeTextAcceptingOwnership(
- char* utf8_buffer, int byte_length, int byte_capacity) {
- return UnicodeText().TakeOwnershipOfUTF8(
- utf8_buffer, byte_length, byte_capacity);
-}
-
-// A factory function for creating a UnicodeText from a buffer of
-// UTF-8 data. The new UnicodeText does not take ownership of the
-// buffer. (It is an "alias.")
-//
-inline UnicodeText MakeUnicodeTextWithoutAcceptingOwnership(
- const char* utf8_buffer, int byte_length) {
- return UnicodeText().PointToUTF8(utf8_buffer, byte_length);
-}
-
-// Create a UnicodeText from a UTF-8 string or buffer.
-//
-// If do_copy is true, then a copy of the string is made. The copy is
-// owned by the resulting UnicodeText object and will be freed when
-// the object is destroyed. This UnicodeText object is referred to
-// as an "owner."
-//
-// If do_copy is false, then no copy is made. The resulting
-// UnicodeText object does NOT take ownership of the string; in this
-// case, the lifetime of the UnicodeText object must not exceed the
-// lifetime of the string. This Unicodetext object is referred to as
-// an "alias." This is the same as MakeUnicodeTextWithoutAcceptingOwnership.
-//
-// If the input string does not contain valid UTF-8, then a copy is
-// made (as if do_copy were true) and coerced to valid UTF-8 by
-// replacing each invalid byte with a space.
-//
-inline UnicodeText UTF8ToUnicodeText(const char* utf8_buf, int len,
- bool do_copy) {
- UnicodeText t;
- if (do_copy) {
- t.CopyUTF8(utf8_buf, len);
- } else {
- t.PointToUTF8(utf8_buf, len);
- }
- return t;
-}
-
-inline UnicodeText UTF8ToUnicodeText(const string& utf_string, bool do_copy) {
- return UTF8ToUnicodeText(utf_string.data(), utf_string.size(), do_copy);
-}
-
-inline UnicodeText UTF8ToUnicodeText(const char* utf8_buf, int len) {
- return UTF8ToUnicodeText(utf8_buf, len, true);
-}
-inline UnicodeText UTF8ToUnicodeText(const string& utf8_string) {
- return UTF8ToUnicodeText(utf8_string, true);
-}
-
-// Return a string containing the UTF-8 encoded version of all the
-// Unicode characters in t.
-inline string UnicodeTextToUTF8(const UnicodeText& t) {
- return string(t.utf8_data(), t.utf8_length());
-}
-
-#endif // UTIL_UTF8_UNICODETEXT_H__
diff --git a/third_party/libphonenumber/cpp/src/utf/unilib.cc b/third_party/libphonenumber/cpp/src/utf/unilib.cc
deleted file mode 100644
index 6d90954..0000000
--- a/third_party/libphonenumber/cpp/src/utf/unilib.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Copyright 2010 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Author: Shawn Ligocki
-
-#include "utf/unilib.h"
-
-#include "base/basictypes.h"
-#include "utf/utf.h"
-
-namespace UniLib {
-
-namespace {
-
-// MOE: start_strip
-// MOE: end_strip
-// Codepoints not allowed for interchange are:
-// C0 (ASCII) controls: U+0000 to U+001F excluding Space (SP, U+0020),
-// Horizontal Tab (HT, U+0009), Line-Feed (LF, U+000A),
-// Form Feed (FF, U+000C) and Carriage-Return (CR, U+000D)
-// C1 controls: U+007F to U+009F
-// Surrogates: U+D800 to U+DFFF
-// Non-characters: U+FDD0 to U+FDEF and U+xxFFFE to U+xxFFFF for all xx
-inline bool IsInterchangeValidCodepoint(char32 c) {
- return !((c >= 0x00 && c <= 0x08) || c == 0x0B || (c >= 0x0E && c <= 0x1F) ||
- (c >= 0x7F && c <= 0x9F) ||
- (c >= 0xD800 && c <= 0xDFFF) ||
- (c >= 0xFDD0 && c <= 0xFDEF) || (c&0xFFFE) == 0xFFFE);
-}
-
-} // namespace
-
-int SpanInterchangeValid(const char* begin, int byte_length) {
- char32 rune;
- const char* p = begin;
- const char* end = begin + byte_length;
- while (p < end) {
- int bytes_consumed = charntorune(&rune, p, end - p);
- // We want to accept Runeerror == U+FFFD as a valid char, but it is used
- // by chartorune to indicate error. Luckily, the real codepoint is size 3
- // while errors return bytes_consumed == 1.
- if ((rune == Runeerror && bytes_consumed == 1) ||
- !IsInterchangeValidCodepoint(rune)) {
- break; // Found
- }
- p += bytes_consumed;
- }
- return p - begin;
-}
-
-} // namespace UniLib
diff --git a/third_party/libphonenumber/cpp/src/utf/unilib.h b/third_party/libphonenumber/cpp/src/utf/unilib.h
deleted file mode 100644
index 4cfc787..0000000
--- a/third_party/libphonenumber/cpp/src/utf/unilib.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * Copyright 2010 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Routines to do manipulation of Unicode characters or text
-//
-// The StructurallyValid routines accept buffers of arbitrary bytes.
-// For CoerceToStructurallyValid(), the input buffer and output buffers may
-// point to exactly the same memory.
-//
-// In all other cases, the UTF-8 string must be structurally valid and
-// have all codepoints in the range U+0000 to U+D7FF or U+E000 to U+10FFFF.
-// Debug builds take a fatal error for invalid UTF-8 input.
-// The input and output buffers may not overlap at all.
-//
-// The char32 routines are here only for convenience; they convert to UTF-8
-// internally and use the UTF-8 routines.
-
-#ifndef UTIL_UTF8_UNILIB_H__
-#define UTIL_UTF8_UNILIB_H__
-
-#include <string>
-#include "base/basictypes.h"
-
-namespace UniLib {
-
-// Returns true unless a surrogate code point
-inline bool IsValidCodepoint(char32 c) {
- // In the range [0, 0xD800) or [0xE000, 0x10FFFF]
- return (static_cast<uint32>(c) < 0xD800)
- || (c >= 0xE000 && c <= 0x10FFFF);
-}
-
-// Table of UTF-8 character lengths, based on first byte
-static const unsigned char kUTF8LenTbl[256] = {
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
-
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
-};
-
-// Return length of a single UTF-8 source character
-inline int OneCharLen(const char* src) {
- return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
-}
-
-// Return length of a single UTF-8 source character
-inline int OneCharLen(const uint8* src) {
- return kUTF8LenTbl[*src];
-}
-
-// Return true if this byte is a trailing UTF-8 byte (10xx xxxx)
-inline bool IsTrailByte(char x) {
- // return (x & 0xC0) == 0x80;
- // Since trail bytes are always in [0x80, 0xBF], we can optimize:
- return static_cast<signed char>(x) < -0x40;
-}
-
-// Returns the length in bytes of the prefix of src that is all
-// interchange valid UTF-8
-int SpanInterchangeValid(const char* src, int byte_length);
-inline int SpanInterchangeValid(const std::string& src) {
- return SpanInterchangeValid(src.data(), src.size());
-}
-
-// Returns true if the source is all interchange valid UTF-8
-// "Interchange valid" is a stronger than structurally valid --
-// no C0 or C1 control codes (other than CR LF HT FF) and no non-characters.
-inline bool IsInterchangeValid(const char* src, int byte_length) {
- return (byte_length == SpanInterchangeValid(src, byte_length));
-}
-inline bool IsInterchangeValid(const std::string& src) {
- return IsInterchangeValid(src.data(), src.size());
-}
-
-} // namespace UniLib
-
-#endif // UTIL_UTF8_PUBLIC_UNILIB_H_
diff --git a/third_party/libphonenumber/cpp/src/utf/utf.h b/third_party/libphonenumber/cpp/src/utf/utf.h
deleted file mode 100644
index f4fd482..0000000
--- a/third_party/libphonenumber/cpp/src/utf/utf.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * The authors of this software are Rob Pike and Ken Thompson.
- * Copyright (c) 1998-2002 by Lucent Technologies.
- * Portions Copyright (c) 2009 The Go Authors. All rights reserved.
- * Permission to use, copy, modify, and distribute this software for any
- * purpose without fee is hereby granted, provided that this entire notice
- * is included in all copies of any software which is or includes a copy
- * or modification of this software and in all copies of the supporting
- * documentation for such software.
- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
- */
-
-#ifndef _UTFH_
-#define _UTFH_ 1
-
-// stdint.h content doesn't seem to be used in this file and doesn't exist on
-// Windows, therefore we comment it out here so that the code could be compiled
-// on Windows.
-//#include <stdint.h>
-
-typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/
-
-enum
-{
- UTFmax = 4, /* maximum bytes per rune */
- Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
- Runeself = 0x80, /* rune and UTF sequences are the same (<) */
- Runeerror = 0xFFFD, /* decoding error in UTF */
- Runemax = 0x10FFFF, /* maximum rune value */
-};
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * rune routines
- */
-
-/*
- * These routines were written by Rob Pike and Ken Thompson
- * and first appeared in Plan 9.
- * SEE ALSO
- * utf (7)
- * tcs (1)
-*/
-
-// runetochar copies (encodes) one rune, pointed to by r, to at most
-// UTFmax bytes starting at s and returns the number of bytes generated.
-
-int runetochar(char* s, const Rune* r);
-
-
-// chartorune copies (decodes) at most UTFmax bytes starting at s to
-// one rune, pointed to by r, and returns the number of bytes consumed.
-// If the input is not exactly in UTF format, chartorune will set *r
-// to Runeerror and return 1.
-//
-// Note: There is no special case for a "null-terminated" string. A
-// string whose first byte has the value 0 is the UTF8 encoding of the
-// Unicode value 0 (i.e., ASCII NULL). A byte value of 0 is illegal
-// anywhere else in a UTF sequence.
-
-int chartorune(Rune* r, const char* s);
-
-
-// charntorune is like chartorune, except that it will access at most
-// n bytes of s. If the UTF sequence is incomplete within n bytes,
-// charntorune will set *r to Runeerror and return 0. If it is complete
-// but not in UTF format, it will set *r to Runeerror and return 1.
-//
-// Added 2004-09-24 by Wei-Hwa Huang
-
-int charntorune(Rune* r, const char* s, int n);
-
-// isvalidcharntorune(str, n, r, consumed)
-// is a convenience function that calls "*consumed = charntorune(r, str, n)"
-// and returns an int (logically boolean) indicating whether the first
-// n bytes of str was a valid and complete UTF sequence.
-
-int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed);
-
-// runelen returns the number of bytes required to convert r into UTF.
-
-int runelen(Rune r);
-
-
-// runenlen returns the number of bytes required to convert the n
-// runes pointed to by r into UTF.
-
-int runenlen(const Rune* r, int n);
-
-
-// fullrune returns 1 if the string s of length n is long enough to be
-// decoded by chartorune, and 0 otherwise. This does not guarantee
-// that the string contains a legal UTF encoding. This routine is used
-// by programs that obtain input one byte at a time and need to know
-// when a full rune has arrived.
-
-int fullrune(const char* s, int n);
-
-// The following routines are analogous to the corresponding string
-// routines with "utf" substituted for "str", and "rune" substituted
-// for "chr".
-
-// utflen returns the number of runes that are represented by the UTF
-// string s. (cf. strlen)
-
-int utflen(const char* s);
-
-
-// utfnlen returns the number of complete runes that are represented
-// by the first n bytes of the UTF string s. If the last few bytes of
-// the string contain an incompletely coded rune, utfnlen will not
-// count them; in this way, it differs from utflen, which includes
-// every byte of the string. (cf. strnlen)
-
-int utfnlen(const char* s, long n);
-
-
-// utfrune returns a pointer to the first occurrence of rune r in the
-// UTF string s, or 0 if r does not occur in the string. The NULL
-// byte terminating a string is considered to be part of the string s.
-// (cf. strchr)
-
-const char* utfrune(const char* s, Rune r);
-
-
-// utfrrune returns a pointer to the last occurrence of rune r in the
-// UTF string s, or 0 if r does not occur in the string. The NULL
-// byte terminating a string is considered to be part of the string s.
-// (cf. strrchr)
-
-const char* utfrrune(const char* s, Rune r);
-
-
-// utfutf returns a pointer to the first occurrence of the UTF string
-// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
-// null string, utfutf returns s1. (cf. strstr)
-
-const char* utfutf(const char* s1, const char* s2);
-
-
-// utfecpy copies UTF sequences until a null sequence has been copied,
-// but writes no sequences beyond es1. If any sequences are copied,
-// s1 is terminated by a null sequence, and a pointer to that sequence
-// is returned. Otherwise, the original s1 is returned. (cf. strecpy)
-
-char* utfecpy(char *s1, char *es1, const char *s2);
-
-
-
-// These functions are rune-string analogues of the corresponding
-// functions in strcat (3).
-//
-// These routines first appeared in Plan 9.
-// SEE ALSO
-// memmove (3)
-// rune (3)
-// strcat (2)
-//
-// BUGS: The outcome of overlapping moves varies among implementations.
-
-Rune* runestrcat(Rune* s1, const Rune* s2);
-Rune* runestrncat(Rune* s1, const Rune* s2, long n);
-
-const Rune* runestrchr(const Rune* s, Rune c);
-
-int runestrcmp(const Rune* s1, const Rune* s2);
-int runestrncmp(const Rune* s1, const Rune* s2, long n);
-
-Rune* runestrcpy(Rune* s1, const Rune* s2);
-Rune* runestrncpy(Rune* s1, const Rune* s2, long n);
-Rune* runestrecpy(Rune* s1, Rune* es1, const Rune* s2);
-
-Rune* runestrdup(const Rune* s);
-
-const Rune* runestrrchr(const Rune* s, Rune c);
-long runestrlen(const Rune* s);
-const Rune* runestrstr(const Rune* s1, const Rune* s2);
-
-
-
-// The following routines test types and modify cases for Unicode
-// characters. Unicode defines some characters as letters and
-// specifies three cases: upper, lower, and title. Mappings among the
-// cases are also defined, although they are not exhaustive: some
-// upper case letters have no lower case mapping, and so on. Unicode
-// also defines several character properties, a subset of which are
-// checked by these routines. These routines are based on Unicode
-// version 3.0.0.
-//
-// NOTE: The routines are implemented in C, so the boolean functions
-// (e.g., isupperrune) return 0 for false and 1 for true.
-//
-//
-// toupperrune, tolowerrune, and totitlerune are the Unicode case
-// mappings. These routines return the character unchanged if it has
-// no defined mapping.
-
-Rune toupperrune(Rune r);
-Rune tolowerrune(Rune r);
-Rune totitlerune(Rune r);
-
-
-// isupperrune tests for upper case characters, including Unicode
-// upper case letters and targets of the toupper mapping. islowerrune
-// and istitlerune are defined analogously.
-
-int isupperrune(Rune r);
-int islowerrune(Rune r);
-int istitlerune(Rune r);
-
-
-// isalpharune tests for Unicode letters; this includes ideographs in
-// addition to alphabetic characters.
-
-int isalpharune(Rune r);
-
-
-// isdigitrune tests for digits. Non-digit numbers, such as Roman
-// numerals, are not included.
-
-int isdigitrune(Rune r);
-
-
-// isideographicrune tests for ideographic characters and numbers, as
-// defined by the Unicode standard.
-
-int isideographicrune(Rune r);
-
-
-// isspacerune tests for whitespace characters, including "C" locale
-// whitespace, Unicode defined whitespace, and the "zero-width
-// non-break space" character.
-
-int isspacerune(Rune r);
-
-
-// (The comments in this file were copied from the manpage files rune.3,
-// isalpharune.3, and runestrcat.3. Some formatting changes were also made
-// to conform to Google style. /JRM 11/11/05)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/third_party/libphonenumber/cpp/src/utf/utfdef.h b/third_party/libphonenumber/cpp/src/utf/utfdef.h
deleted file mode 100644
index adc6d95..0000000
--- a/third_party/libphonenumber/cpp/src/utf/utfdef.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * The authors of this software are Rob Pike and Ken Thompson.
- * Copyright (c) 1998-2002 by Lucent Technologies.
- * Permission to use, copy, modify, and distribute this software for any
- * purpose without fee is hereby granted, provided that this entire notice
- * is included in all copies of any software which is or includes a copy
- * or modification of this software and in all copies of the supporting
- * documentation for such software.
- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
- */
-
-#define uchar _utfuchar
-#define ushort _utfushort
-#define uint _utfuint
-#define ulong _utfulong
-#define vlong _utfvlong
-#define uvlong _utfuvlong
-
-typedef unsigned char uchar;
-typedef unsigned short ushort;
-typedef unsigned int uint;
-typedef unsigned long ulong;
-
-#define nelem(x) (sizeof(x)/sizeof((x)[0]))
-#define nil ((void*)0)