diff options
author | jshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-11-04 00:05:29 +0000 |
---|---|---|
committer | jshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-11-04 00:05:29 +0000 |
commit | 2fee0707b35485dbf78d1362258412c6f63ada22 (patch) | |
tree | 3d77430efae27da95214aea1a03dcab7409b9ae4 /third_party | |
parent | 8ac2a02fd8f0e4355d70ae6c51d2c1c51f92b64a (diff) | |
download | chromium_src-2fee0707b35485dbf78d1362258412c6f63ada22.zip chromium_src-2fee0707b35485dbf78d1362258412c6f63ada22.tar.gz chromium_src-2fee0707b35485dbf78d1362258412c6f63ada22.tar.bz2 |
Make CLD work properly on ARM
The patch adapted from the upstream change.
BUG=50113
TEST=1. Apply the CL and build Chrome on ARM and visit the page listed in the bug. Chrome should not freeze.
2. Run unit_tests --gtest_filter=CompactLang*.*
Review URL: http://codereview.chromium.org/3325020
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@108586 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party')
4 files changed, 61 insertions, 36 deletions
diff --git a/third_party/cld/base/basictypes.h b/third_party/cld/base/basictypes.h index 617b669..287d1c2 100644 --- a/third_party/cld/base/basictypes.h +++ b/third_party/cld/base/basictypes.h @@ -344,5 +344,23 @@ namespace base { enum LinkerInitialized { LINKER_INITIALIZED }; } // base +// UnaligndLoad32 is put here instead of base/port.h to +// avoid the circular dependency between port.h and basictypes.h +// ARM does not support unaligned memory access. +#if defined(ARCH_CPU_X86_FAMILY) +// x86 and x86-64 can perform unaligned loads/stores directly; +inline uint32 UnalignedLoad32(const void* p) { + return *reinterpret_cast<const uint32*>(p); +} +#else +#define NEED_ALIGNED_LOADS +// If target architecture does not support unaligned loads and stores, +// use memcpy version of UNALIGNED_LOAD32. +inline uint32 UnalignedLoad32(const void* p) { + uint32 t; + memcpy(&t, reinterpret_cast<const uint8*>(p), sizeof(t)); + return t; +} +#endif #endif // BASE_BASICTYPES_H_ diff --git a/third_party/cld/encodings/compact_lang_det/cldutil.cc b/third_party/cld/encodings/compact_lang_det/cldutil.cc index 5bbed82..06e9e97 100644 --- a/third_party/cld/encodings/compact_lang_det/cldutil.cc +++ b/third_party/cld/encodings/compact_lang_det/cldutil.cc @@ -4,6 +4,8 @@ #include <string> #include "encodings/compact_lang_det/cldutil.h" + +#include "base/basictypes.h" #include "encodings/compact_lang_det/cldutil_dbg.h" #include "encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h" #include "encodings/compact_lang_det/utf8propletterscriptnum.h" @@ -95,17 +97,16 @@ uint32 cld::BiHashV25(const char* word_ptr, int bytecount) { if (bytecount == 0) { return 0; } - const uint32* word_ptr32 = reinterpret_cast<const uint32*>(word_ptr); uint32 word0, word1; if (bytecount <= 4) { - word0 = word_ptr32[0] & kWordMask0[bytecount & 3]; + word0 = UnalignedLoad32(word_ptr) & kWordMask0[bytecount & 3]; word0 = word0 ^ (word0 >> 3); return word0; } // Else do 8 bytes - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(word_ptr); word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1] & kWordMask0[bytecount & 3]; + word1 = UnalignedLoad32(word_ptr + 4) & kWordMask0[bytecount & 3]; word1 = word1 ^ (word1 << 18); return word0 + word1; } @@ -153,25 +154,24 @@ uint32 cld::BiHashV25(const char* word_ptr, int bytecount) { // OVERSHOOTS up to 3 bytes // For runtime use of tables uint32 QuadHashV25Mix(const char* word_ptr, int bytecount, uint32 prepost) { - const uint32* word_ptr32 = reinterpret_cast<const uint32*>(word_ptr); uint32 word0, word1, word2; if (bytecount <= 4) { - word0 = word_ptr32[0] & kWordMask0[bytecount & 3]; + word0 = UnalignedLoad32(word_ptr) & kWordMask0[bytecount & 3]; word0 = word0 ^ (word0 >> 3); return word0 ^ prepost; } else if (bytecount <= 8) { - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(word_ptr); word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1] & kWordMask0[bytecount & 3]; + word1 = UnalignedLoad32(word_ptr + 4) & kWordMask0[bytecount & 3]; word1 = word1 ^ (word1 << 4); return (word0 ^ prepost) + word1; } // else do 12 bytes - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(word_ptr); word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1]; + word1 = UnalignedLoad32(word_ptr + 4); word1 = word1 ^ (word1 << 4); - word2 = word_ptr32[2] & kWordMask0[bytecount & 3]; + word2 = UnalignedLoad32(word_ptr + 8) & kWordMask0[bytecount & 3]; word2 = word2 ^ (word2 << 2); return (word0 ^ prepost) + word1 + word2; } @@ -223,7 +223,6 @@ uint32 cld::QuadHashV25Underscore(const char* word_ptr, int bytecount) { // The high 8 bits are a simple sum of all bytes, shifted by 0/1/2/3 bits each // For runtime use of tables V3 uint64 OctaHash40Mix(const char* word_ptr, int bytecount, uint64 prepost) { - const uint32* word_ptr32 = reinterpret_cast<const uint32*>(word_ptr); uint64 word0; uint64 word1; uint64 sum; @@ -232,91 +231,91 @@ uint64 OctaHash40Mix(const char* word_ptr, int bytecount, uint64 prepost) { if (word_ptr[bytecount] == ' ') {prepost |= kPostSpaceIndicator;} switch ((bytecount - 1) >> 2) { case 0: // 1..4 bytes - word0 = word_ptr32[0] & kWordMask0[bytecount & 3]; + word0 = UnalignedLoad32(word_ptr) & kWordMask0[bytecount & 3]; sum = word0; word0 = word0 ^ (word0 >> 3); break; case 1: // 5..8 bytes - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(word_ptr); sum = word0; word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1] & kWordMask0[bytecount & 3]; + word1 = UnalignedLoad32(word_ptr + 4) & kWordMask0[bytecount & 3]; sum += word1; word1 = word1 ^ (word1 << 4); word0 += word1; break; case 2: // 9..12 bytes - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(word_ptr); sum = word0; word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1]; + word1 = UnalignedLoad32(word_ptr + 4); sum += word1; word1 = word1 ^ (word1 << 4); word0 += word1; - word1 = word_ptr32[2] & kWordMask0[bytecount & 3]; + word1 = UnalignedLoad32(word_ptr + 8) & kWordMask0[bytecount & 3]; sum += word1; word1 = word1 ^ (word1 << 2); word0 += word1; break; case 3: // 13..16 bytes - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(word_ptr); sum = word0; word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1]; + word1 = UnalignedLoad32(word_ptr + 4); sum += word1; word1 = word1 ^ (word1 << 4); word0 += word1; - word1 = word_ptr32[2]; + word1 = UnalignedLoad32(word_ptr + 8); sum += word1; word1 = word1 ^ (word1 << 2); word0 += word1; - word1 = word_ptr32[3] & kWordMask0[bytecount & 3]; + word1 = UnalignedLoad32(word_ptr + 12) & kWordMask0[bytecount & 3]; sum += word1; word1 = word1 ^ (word1 >> 8); word0 += word1; break; case 4: // 17..20 bytes - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(word_ptr); sum = word0; word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1]; + word1 = UnalignedLoad32(word_ptr + 4); sum += word1; word1 = word1 ^ (word1 << 4); word0 += word1; - word1 = word_ptr32[2]; + word1 = UnalignedLoad32(word_ptr + 8); sum += word1; word1 = word1 ^ (word1 << 2); word0 += word1; - word1 = word_ptr32[3]; + word1 = UnalignedLoad32(word_ptr + 12); sum += word1; word1 = word1 ^ (word1 >> 8); word0 += word1; - word1 = word_ptr32[4] & kWordMask0[bytecount & 3]; + word1 = UnalignedLoad32(word_ptr + 16) & kWordMask0[bytecount & 3]; sum += word1; word1 = word1 ^ (word1 >> 4); word0 += word1; break; default: // 21..24 bytes and higher (ignores beyond 24) - word0 = word_ptr32[0]; + word0 = UnalignedLoad32(&word_ptr); sum = word0; word0 = word0 ^ (word0 >> 3); - word1 = word_ptr32[1]; + word1 = UnalignedLoad32(word_ptr + 4); sum += word1; word1 = word1 ^ (word1 << 4); word0 += word1; - word1 = word_ptr32[2]; + word1 = UnalignedLoad32(word_ptr + 8); sum += word1; word1 = word1 ^ (word1 << 2); word0 += word1; - word1 = word_ptr32[3]; + word1 = UnalignedLoad32(word_ptr + 12); sum += word1; word1 = word1 ^ (word1 >> 8); word0 += word1; - word1 = word_ptr32[4]; + word1 = UnalignedLoad32(word_ptr + 16); sum += word1; word1 = word1 ^ (word1 >> 4); word0 += word1; - word1 = word_ptr32[5] & kWordMask0[bytecount & 3]; + word1 = UnalignedLoad32(word_ptr + 20) & kWordMask0[bytecount & 3]; sum += word1; word1 = word1 ^ (word1 >> 6); word0 += word1; diff --git a/third_party/cld/encodings/compact_lang_det/getonescriptspan.cc b/third_party/cld/encodings/compact_lang_det/getonescriptspan.cc index d947d21..29a3603 100644 --- a/third_party/cld/encodings/compact_lang_det/getonescriptspan.cc +++ b/third_party/cld/encodings/compact_lang_det/getonescriptspan.cc @@ -6,6 +6,7 @@ #include <stdio.h> #include <string.h> +#include "base/basictypes.h" #include "encodings/lang_enc.h" #include "encodings/compact_lang_det/utf8propjustletter.h" #include "encodings/compact_lang_det/utf8propletterscriptnum.h" @@ -339,6 +340,11 @@ int ScriptScanner::SkipToFrontOfSpan(const char* src, int len, int* script) { return skip; } +#ifdef NEED_ALIGNED_LOADS +static const bool kNeedsAlignedLoads = true; +#else +static const bool kNeedsAlignedLoads = false; +#endif // Copy next run of same-script non-tag letters to buffer [NUL terminated] @@ -409,7 +415,7 @@ bool ScriptScanner::GetOneScriptSpan(getone::LangSpan* span) { // Real letter, safely copy up to 4 bytes, increment by 1..4 // Will update by 1..4 bytes at Advance, below tlen = plen = cld_UniLib::OneCharLen(next_byte_ + take); - if (take < (byte_length_ - 3)) { + if (!kNeedsAlignedLoads && (take < (byte_length_ - 3))) { // Fast case *reinterpret_cast<uint32*>(script_buffer_ + put) = *reinterpret_cast<const uint32*>(next_byte_ + take); diff --git a/third_party/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc b/third_party/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc index 58889e9..1577a86 100644 --- a/third_party/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +++ b/third_party/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc @@ -4,6 +4,8 @@ #include "encodings/compact_lang_det/win/cld_utf8statetable.h" +#include "base/basictypes.h" + // Return true if current Tbl pointer is within state0 range // Note that unsigned compare checks both ends of range simultaneously static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) { @@ -158,8 +160,8 @@ DoAgain: uint32 losub = st->losub; uint32 hiadd = st->hiadd; while (src < srclimit8) { - uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0]; - uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1]; + uint32 s0123 = UnalignedLoad32(src); + uint32 s4567 = UnalignedLoad32(src + 4); src += 8; // This is a fast range check for all bytes in [lowsub..0x80-hiadd) uint32 temp = (s0123 - losub) | (s0123 + hiadd) | |