summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--third_party/cld/cld.gyp4
-rw-r--r--third_party/cld/encodings/compact_lang_det/string_byte_sink.cc17
-rw-r--r--third_party/cld/encodings/compact_lang_det/string_byte_sink.h26
-rw-r--r--third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc9
4 files changed, 52 insertions, 4 deletions
diff --git a/third_party/cld/cld.gyp b/third_party/cld/cld.gyp
index bdd4ef6..b2db4c7 100644
--- a/third_party/cld/cld.gyp
+++ b/third_party/cld/cld.gyp
@@ -1,4 +1,4 @@
-# Copyright (c) 2009 The Chromium Authors. All rights reserved.
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -34,6 +34,8 @@
'encodings/compact_lang_det/getonescriptspan.h',
'encodings/compact_lang_det/letterscript_enum.cc',
'encodings/compact_lang_det/letterscript_enum.h',
+ 'encodings/compact_lang_det/string_byte_sink.cc',
+ 'encodings/compact_lang_det/string_byte_sink.h',
'encodings/compact_lang_det/subsetsequence.cc',
'encodings/compact_lang_det/subsetsequence.h',
'encodings/compact_lang_det/tote.cc',
diff --git a/third_party/cld/encodings/compact_lang_det/string_byte_sink.cc b/third_party/cld/encodings/compact_lang_det/string_byte_sink.cc
new file mode 100644
index 0000000..755778e
--- /dev/null
+++ b/third_party/cld/encodings/compact_lang_det/string_byte_sink.cc
@@ -0,0 +1,17 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "encodings/compact_lang_det/string_byte_sink.h"
+
+#include <string>
+
+using std::string;
+
+StringByteSink::StringByteSink(string* dest) : dest_(dest) {}
+
+StringByteSink::~StringByteSink() {}
+
+void StringByteSink::Append(const char* data, int32_t n) {
+ dest_->append(data, n);
+}
diff --git a/third_party/cld/encodings/compact_lang_det/string_byte_sink.h b/third_party/cld/encodings/compact_lang_det/string_byte_sink.h
new file mode 100644
index 0000000..25c8181
--- /dev/null
+++ b/third_party/cld/encodings/compact_lang_det/string_byte_sink.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef ENCODINGS_COMPACT_LANG_DET_STRING_BYTE_SINK_H_
+#define ENCODINGS_COMPACT_LANG_DET_STRING_BYTE_SINK_H_
+
+#include <string>
+
+#include <unicode/unistr.h>
+
+// Implementation of a string byte sink needed when ICU is compiled without
+// support for std::string which is the case on Android.
+class StringByteSink : public icu::ByteSink {
+ public:
+ // Constructs a ByteSink that will append bytes to the dest string.
+ explicit StringByteSink(std::string* dest);
+ virtual ~StringByteSink();
+
+ virtual void Append(const char* data, int32_t n);
+
+ private:
+ std::string* const dest_;
+};
+
+#endif // ENCODINGS_COMPACT_LANG_DET_STRING_BYTE_SINK_H_
diff --git a/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc b/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc
index 9da01f7..e5f08da 100644
--- a/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc
+++ b/third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -8,6 +8,7 @@
#include <vector> // to compile bar/common/component.h
#include "encodings/compact_lang_det/compact_lang_det.h"
+#include "encodings/compact_lang_det/string_byte_sink.h"
#include "base/string_util.h"
#include "unicode/normlzr.h"
#include "unicode/unistr.h"
@@ -23,11 +24,13 @@ std::string NormalizeText(const UChar* text) {
return std::string();
normalized.toLower();
std::string utf8;
- // Internally, toUTF8String uses a 1kB stack buffer (which is not large enough
+ // Internally, toUTF8 uses a 1kB stack buffer (which is not large enough
// for most web pages) and does pre-flighting followed by malloc for larger
// strings. We have to switch to obtaining the buffer with the maximum size
// (UTF-16 length * 3) without pre-flighting if necessary.
- return normalized.toUTF8String(utf8);
+ StringByteSink sink(&utf8);
+ normalized.toUTF8(sink);
+ return utf8;
}