diff options
4 files changed, 52 insertions, 4 deletions
diff --git a/third_party/cld/cld.gyp b/third_party/cld/cld.gyp index bdd4ef6..b2db4c7 100644 --- a/third_party/cld/cld.gyp +++ b/third_party/cld/cld.gyp @@ -1,4 +1,4 @@ -# Copyright (c) 2009 The Chromium Authors. All rights reserved. +# Copyright (c) 2012 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -34,6 +34,8 @@ 'encodings/compact_lang_det/getonescriptspan.h', 'encodings/compact_lang_det/letterscript_enum.cc', 'encodings/compact_lang_det/letterscript_enum.h', + 'encodings/compact_lang_det/string_byte_sink.cc', + 'encodings/compact_lang_det/string_byte_sink.h', 'encodings/compact_lang_det/subsetsequence.cc', 'encodings/compact_lang_det/subsetsequence.h', 'encodings/compact_lang_det/tote.cc', diff --git a/third_party/cld/encodings/compact_lang_det/string_byte_sink.cc b/third_party/cld/encodings/compact_lang_det/string_byte_sink.cc new file mode 100644 index 0000000..755778e --- /dev/null +++ b/third_party/cld/encodings/compact_lang_det/string_byte_sink.cc @@ -0,0 +1,17 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "encodings/compact_lang_det/string_byte_sink.h" + +#include <string> + +using std::string; + +StringByteSink::StringByteSink(string* dest) : dest_(dest) {} + +StringByteSink::~StringByteSink() {} + +void StringByteSink::Append(const char* data, int32_t n) { + dest_->append(data, n); +} diff --git a/third_party/cld/encodings/compact_lang_det/string_byte_sink.h b/third_party/cld/encodings/compact_lang_det/string_byte_sink.h new file mode 100644 index 0000000..25c8181 --- /dev/null +++ b/third_party/cld/encodings/compact_lang_det/string_byte_sink.h @@ -0,0 +1,26 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef ENCODINGS_COMPACT_LANG_DET_STRING_BYTE_SINK_H_ +#define ENCODINGS_COMPACT_LANG_DET_STRING_BYTE_SINK_H_ + +#include <string> + +#include <unicode/unistr.h> + +// Implementation of a string byte sink needed when ICU is compiled without +// support for std::string which is the case on Android. +class StringByteSink : public icu::ByteSink { + public: + // Constructs a ByteSink that will append bytes to the dest string. + explicit StringByteSink(std::string* dest); + virtual ~StringByteSink(); + + virtual void Append(const char* data, int32_t n); + + private: + std::string* const dest_; +}; + +#endif // ENCODINGS_COMPACT_LANG_DET_STRING_BYTE_SINK_H_ diff --git a/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc b/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc index 9da01f7..e5f08da 100644 --- a/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +++ b/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -8,6 +8,7 @@ #include <vector> // to compile bar/common/component.h #include "encodings/compact_lang_det/compact_lang_det.h" +#include "encodings/compact_lang_det/string_byte_sink.h" #include "base/string_util.h" #include "unicode/normlzr.h" #include "unicode/unistr.h" @@ -23,11 +24,13 @@ std::string NormalizeText(const UChar* text) { return std::string(); normalized.toLower(); std::string utf8; - // Internally, toUTF8String uses a 1kB stack buffer (which is not large enough + // Internally, toUTF8 uses a 1kB stack buffer (which is not large enough // for most web pages) and does pre-flighting followed by malloc for larger // strings. We have to switch to obtaining the buffer with the maximum size // (UTF-16 length * 3) without pre-flighting if necessary. - return normalized.toUTF8String(utf8); + StringByteSink sink(&utf8); + normalized.toUTF8(sink); + return utf8; } |