Be more lenient with 4 byte UTF-8 sequences.

Accept 4 byte sequences and convert them into surrogate pairs instead of expecting 2 separate 3 byte sequences each encoding one half of a surrogate pair. Note that in addition to supporting 4 byte sequences in strings from JNI, we also tolerate them in dex files. This is mainly for consistency, and there's no need to claim any sort of official support. bug: 18848397 bug: https://code.google.com/p/android/issues/detail?id=81341 Change-Id: Ibc98d29e59d98803e640f2489ea4c56912a59b29
author: Narayan Kamath <narayan@google.com> 2015-01-29 20:06:46 +0000
committer: Narayan Kamath <narayan@google.com> 2015-02-12 11:54:37 +0000
commit: a5afcfc73141e5e378d79a326d02c5c2039fb025 (patch)
tree: 424add9558fb816c4f1d2f4edd128f4f2a086d9a /runtime/jni_internal_test.cc
parent: 5a3399deaf448c8434d9ba0916ff799b1b791d95 (diff)
download: art-a5afcfc73141e5e378d79a326d02c5c2039fb025.zip
art-a5afcfc73141e5e378d79a326d02c5c2039fb025.tar.gz
art-a5afcfc73141e5e378d79a326d02c5c2039fb025.tar.bz2
1 files changed, 30 insertions, 1 deletions
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 906aa4c..1048214 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1351,7 +1351,36 @@ TEST_F(JniInternalTest, NewStringUTF) {
   EXPECT_EQ(5, env_->GetStringLength(s));
   EXPECT_EQ(5, env_->GetStringUTFLength(s));
 
-  // TODO: check some non-ASCII strings.
+  // Encoded surrogate pair.
+  s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80");
+  EXPECT_NE(s, nullptr);
+  EXPECT_EQ(2, env_->GetStringLength(s));
+  // Note that this uses 2 x 3 byte UTF sequences, one
+  // for each half of the surrogate pair.
+  EXPECT_EQ(6, env_->GetStringUTFLength(s));
+  const char* chars = env_->GetStringUTFChars(s, nullptr);
+  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80", chars);
+  env_->ReleaseStringUTFChars(s, chars);
+
+  // 4 byte UTF sequence appended to an encoded surrogate pair.
+  s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80 \xf0\x9f\x8f\xa0");
+  EXPECT_NE(s, nullptr);
+  EXPECT_EQ(5, env_->GetStringLength(s));
+  EXPECT_EQ(13, env_->GetStringUTFLength(s));
+  chars = env_->GetStringUTFChars(s, nullptr);
+  // The 4 byte sequence {0xf0, 0x9f, 0x8f, 0xa0} is converted into a surrogate
+  // pair {0xd83c, 0xdfe0} which is then converted into a two three byte
+  // sequences {0xed 0xa0, 0xbc} and {0xed, 0xbf, 0xa0}, one for each half of
+  // the surrogate pair.
+  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80 \xed\xa0\xbc\xed\xbf\xa0", chars);
+  env_->ReleaseStringUTFChars(s, chars);
+
+  // A string with 1, 2, 3 and 4 byte UTF sequences with spaces
+  // between them
+  s = env_->NewStringUTF("\x24 \xc2\xa2 \xe2\x82\xac \xf0\x9f\x8f\xa0");
+  EXPECT_NE(s, nullptr);
+  EXPECT_EQ(8, env_->GetStringLength(s));
+  EXPECT_EQ(15, env_->GetStringUTFLength(s));
 }
 
 TEST_F(JniInternalTest, NewString) {
author	Narayan Kamath <narayan@google.com>	2015-01-29 20:06:46 +0000
committer	Narayan Kamath <narayan@google.com>	2015-02-12 11:54:37 +0000
commit	a5afcfc73141e5e378d79a326d02c5c2039fb025 (patch)
tree	424add9558fb816c4f1d2f4edd128f4f2a086d9a /runtime/jni_internal_test.cc
parent	5a3399deaf448c8434d9ba0916ff799b1b791d95 (diff)
download	art-a5afcfc73141e5e378d79a326d02c5c2039fb025.zip art-a5afcfc73141e5e378d79a326d02c5c2039fb025.tar.gz art-a5afcfc73141e5e378d79a326d02c5c2039fb025.tar.bz2