From c29962f2b123f4d22798046022ad91b8c1774d32 Mon Sep 17 00:00:00 2001 From: "pkasting@chromium.org" Date: Wed, 3 Dec 2008 00:47:58 +0000 Subject: Make WordIterator and Snippet::MatchPositions use size_t instead of int for offsets into strings. This avoids some casts. I also added a typedef for Snippet::MatchPosition which cleans up a bit of the calling code a little. Review URL: http://codereview.chromium.org/13064 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@6260 0039d316-1c4b-4281-b951-d872f2087c98 --- .../autocomplete/history_contents_provider.cc | 2 +- chrome/browser/history/query_parser.cc | 10 +-- chrome/browser/history/query_parser.h | 2 +- chrome/browser/history/snippet.cc | 73 +++++++++++----------- chrome/browser/history/snippet.h | 7 ++- chrome/browser/history/snippet_unittest.cc | 12 ++-- chrome/browser/history_view.cc | 13 ++-- 7 files changed, 59 insertions(+), 60 deletions(-) (limited to 'chrome/browser') diff --git a/chrome/browser/autocomplete/history_contents_provider.cc b/chrome/browser/autocomplete/history_contents_provider.cc index ce409eb..9eef77c 100644 --- a/chrome/browser/autocomplete/history_contents_provider.cc +++ b/chrome/browser/autocomplete/history_contents_provider.cc @@ -218,7 +218,7 @@ void HistoryContentsProvider::ClassifyDescription( // Classify matches in the title. for (Snippet::MatchPositions::const_iterator i = title_matches.begin(); i != title_matches.end(); ++i) { - if (static_cast(i->first) != offset) { + if (i->first != offset) { match->description_class.push_back( ACMatchClassification(offset, ACMatchClassification::NONE)); } diff --git a/chrome/browser/history/query_parser.cc b/chrome/browser/history/query_parser.cc index d33aea6..4d79a4c 100644 --- a/chrome/browser/history/query_parser.cc +++ b/chrome/browser/history/query_parser.cc @@ -60,10 +60,10 @@ bool QueryNodeWord::HasMatchIn(const std::vector& words, Snippet::MatchPositions* match_positions) const { for (size_t i = 0; i < words.size(); ++i) { if (Matches(words[i].word, false)) { - int match_start = words[i].position; + size_t match_start = words[i].position; match_positions->push_back( - std::pair(match_start, - match_start + static_cast(word_.size()))); + Snippet::MatchPosition(match_start, + match_start + static_cast(word_.size()))); return true; } } @@ -176,8 +176,8 @@ bool QueryNodePhrase::HasMatchIn( if (matched_all) { const QueryWord& last_word = words[i + children_.size() - 1]; match_positions->push_back( - std::pair(words[i].position, - last_word.position + last_word.word.length())); + Snippet::MatchPosition(words[i].position, + last_word.position + last_word.word.length())); return true; } } diff --git a/chrome/browser/history/query_parser.h b/chrome/browser/history/query_parser.h index a6a0a11..d374ba4 100644 --- a/chrome/browser/history/query_parser.h +++ b/chrome/browser/history/query_parser.h @@ -21,7 +21,7 @@ struct QueryWord { std::wstring word; // The starting position of the word in the original text. - int position; + size_t position; }; // QueryNode is used by QueryNodeParser to represent the elements that diff --git a/chrome/browser/history/snippet.cc b/chrome/browser/history/snippet.cc index d72209e..a95de81 100644 --- a/chrome/browser/history/snippet.cc +++ b/chrome/browser/history/snippet.cc @@ -15,8 +15,8 @@ namespace { -bool PairFirstLessThan(const std::pair& a, - const std::pair& b) { +bool PairFirstLessThan(const Snippet::MatchPosition& a, + const Snippet::MatchPosition& b) { return a.first < b.first; } @@ -25,7 +25,7 @@ bool PairFirstLessThan(const std::pair& a, void CoalescePositionsFrom(size_t offset, Snippet::MatchPositions* match_positions) { DCHECK(offset < match_positions->size()); - std::pair& pair((*match_positions)[offset]); + Snippet::MatchPosition& pair((*match_positions)[offset]); ++offset; while (offset < match_positions->size() && pair.second >= (*match_positions)[offset].first) { @@ -37,9 +37,12 @@ void CoalescePositionsFrom(size_t offset, // Makes sure there is a pair in match_positions that contains the specified // range. This keeps the pairs ordered in match_positions by first, and makes // sure none of the pairs in match_positions touch each other. -void AddMatch(int start, int end, Snippet::MatchPositions* match_positions) { - DCHECK(start < end && match_positions); - std::pair pair(start, end); +void AddMatch(size_t start, + size_t end, + Snippet::MatchPositions* match_positions) { + DCHECK(start < end); + DCHECK(match_positions); + Snippet::MatchPosition pair(start, end); if (match_positions->empty()) { match_positions->push_back(pair); return; @@ -97,11 +100,11 @@ void AddMatch(int start, int end, Snippet::MatchPositions* match_positions) { // matches offset. // wide_pos: current index in the wide string. This is the same as the return // value. -int AdvanceAndReturnWidePos(const char* utf8_string, - int utf8_length, - int offset, - int* utf8_pos, - int* wide_pos) { +size_t AdvanceAndReturnWidePos(const char* utf8_string, + int32_t utf8_length, + int32_t offset, + int32_t* utf8_pos, + size_t* wide_pos) { DCHECK(offset >= *utf8_pos && offset <= utf8_length); UChar32 wide_char; @@ -115,7 +118,7 @@ int AdvanceAndReturnWidePos(const char* utf8_string, // Given a character break iterator over a UTF-8 string, set the iterator // position to |*utf8_pos| and move by |count| characters. |count| can // be either positive or negative. -void MoveByNGraphemes(BreakIterator* bi, int count, int* utf8_pos) { +void MoveByNGraphemes(BreakIterator* bi, int count, size_t* utf8_pos) { // Ignore the return value. A side effect of the current position // being set at or following |*utf8_pos| is exploited here. // It's simpler than calling following(n) and then previous(). @@ -123,7 +126,7 @@ void MoveByNGraphemes(BreakIterator* bi, int count, int* utf8_pos) { // snippet generation. If not, revisit the way we scan in ComputeSnippet. bi->isBoundary(*utf8_pos); bi->next(count); - *utf8_pos = static_cast(bi->current()); + *utf8_pos = static_cast(bi->current()); } // The amount of context to include for a given hit. Note that it's counted @@ -134,8 +137,8 @@ const int kSnippetContext = 50; // from the previous match. The window size is counted in terms // of graphemes rather than bytes in UTF-8. bool IsNextMatchWithinSnippetWindow(BreakIterator* bi, - int previous_match_end, - int next_match_start) { + size_t previous_match_end, + size_t next_match_start) { // If it's within a window in terms of bytes, it's certain // that it's within a window in terms of graphemes as well. if (next_match_start < previous_match_end + kSnippetContext) @@ -168,8 +171,8 @@ void Snippet::ExtractMatchPositions(const std::string& offsets_str, for (size_t i = 0; i < offsets.size() - 3; i += 4) { if (offsets[i] != column_num) continue; - const int start = atoi(offsets[i+2].c_str()); - const int end = start + atoi(offsets[i+3].c_str()); + const size_t start = atoi(offsets[i + 2].c_str()); + const size_t end = start + atoi(offsets[i + 3].c_str()); AddMatch(start, end, match_positions); } } @@ -179,17 +182,16 @@ void Snippet::ConvertMatchPositionsToWide( const std::string& utf8_string, Snippet::MatchPositions* match_positions) { DCHECK(match_positions); - int utf8_pos = 0; - int wide_pos = 0; + int32_t utf8_pos = 0; + size_t wide_pos = 0; const char* utf8_cstring = utf8_string.c_str(); - const int utf8_length = static_cast(utf8_string.size()); + const int32_t utf8_length = static_cast(utf8_string.size()); for (Snippet::MatchPositions::iterator i = match_positions->begin(); i != match_positions->end(); ++i) { i->first = AdvanceAndReturnWidePos(utf8_cstring, utf8_length, i->first, &utf8_pos, &wide_pos); - i->second = - AdvanceAndReturnWidePos(utf8_cstring, utf8_length, i->second, &utf8_pos, - &wide_pos); + i->second = AdvanceAndReturnWidePos(utf8_cstring, utf8_length, + i->second, &utf8_pos, &wide_pos); } } @@ -198,17 +200,12 @@ void Snippet::ComputeSnippet(const MatchPositions& match_positions, // The length of snippets we try to produce. // We can generate longer snippets but stop once we cross kSnippetMaxLength. const size_t kSnippetMaxLength = 200; - - const std::wstring kEllipsis = L" ... "; - // Grab the size as an int to cut down on casts later. - const int document_size = static_cast(document.size()); - UText* document_utext = NULL; UErrorCode status = U_ZERO_ERROR; document_utext = utext_openUTF8(document_utext, document.data(), - document_size, &status); + document.size(), &status); // Locale does not matter because there's no per-locale customization // for character iterator. scoped_ptr bi( @@ -220,14 +217,14 @@ void Snippet::ComputeSnippet(const MatchPositions& match_positions, // context around each match. If matches are near enough each other (within // kSnippetContext), we skip the "..." between them. std::wstring snippet; - int start = 0; + size_t start = 0; for (size_t i = 0; i < match_positions.size(); ++i) { // Some shorter names for the current match. - const int match_start = match_positions[i].first; - const int match_end = match_positions[i].second; + const size_t match_start = match_positions[i].first; + const size_t match_end = match_positions[i].second; // Add the context, if any, to show before the match. - int context_start = match_start; + size_t context_start = match_start; MoveByNGraphemes(bi.get(), -kSnippetContext, &context_start); start = std::max(start, context_start); if (start < match_start) { @@ -237,17 +234,17 @@ void Snippet::ComputeSnippet(const MatchPositions& match_positions, } // Add the match. - matches_.push_back(std::make_pair(static_cast(snippet.size()), 0)); + const size_t first = snippet.size(); snippet += UTF8ToWide(document.substr(match_start, match_end - match_start)); - matches_.back().second = static_cast(snippet.size()); + matches_.push_back(std::make_pair(first, snippet.size())); // Compute the context, if any, to show after the match. - int end; + size_t end; // Check if the next match falls within our snippet window. if (i + 1 < match_positions.size() && IsNextMatchWithinSnippetWindow(bi.get(), match_end, - match_positions[i + 1].first)) { + match_positions[i + 1].first)) { // Yes, it's within the window. Make the end context extend just up // to the next match. end = match_positions[i + 1].first; @@ -257,7 +254,7 @@ void Snippet::ComputeSnippet(const MatchPositions& match_positions, end = match_end; MoveByNGraphemes(bi.get(), kSnippetContext, &end); snippet += UTF8ToWide(document.substr(match_end, end - match_end)); - if (end < document_size) + if (end < document.size()) snippet += kEllipsis; } start = end; diff --git a/chrome/browser/history/snippet.h b/chrome/browser/history/snippet.h index b741c6c..75d75b08 100644 --- a/chrome/browser/history/snippet.h +++ b/chrome/browser/history/snippet.h @@ -13,9 +13,10 @@ class Snippet { public: - // Each pair in MatchPositions is the [begin, end) positions of a match - // within a string. - typedef std::vector > MatchPositions; + // Each MatchPosition is the [begin, end) positions of a match within a + // string. + typedef std::pair MatchPosition; + typedef std::vector MatchPositions; // Parses an offsets string as returned from a sqlite full text index. An // offsets string encodes information about why a row matched a text query. diff --git a/chrome/browser/history/snippet_unittest.cc b/chrome/browser/history/snippet_unittest.cc index 3208cf4..043aa51 100644 --- a/chrome/browser/history/snippet_unittest.cc +++ b/chrome/browser/history/snippet_unittest.cc @@ -79,8 +79,8 @@ const char* kThaiSample = "Google \xE0\xB9\x80\xE0\xB8\x81\xE0\xB9\x87" "\xE0\xB8\xB8\xE0\xB8\x93"; // Comparator for sorting by the first element in a pair. -bool ComparePair1st(const std::pair& a, - const std::pair& b) { +bool ComparePair1st(const Snippet::MatchPosition& a, + const Snippet::MatchPosition& b) { return a.first < b.first; } @@ -106,11 +106,9 @@ std::wstring BuildSnippet(const std::string& document, for (std::vector::iterator qw = query_words.begin(); qw != query_words.end(); ++qw) { // Insert all instances of this word into match_pairs. - std::string::size_type ofs = 0; + size_t ofs = 0; while ((ofs = document_folded.find(*qw, ofs)) != std::string::npos) { - match_positions.push_back( - std::make_pair(static_cast(ofs), - static_cast(ofs + qw->size()))); + match_positions.push_back(std::make_pair(ofs, ofs + qw->size())); ofs += qw->size(); } } @@ -229,7 +227,7 @@ TEST(Snippets, ExtractMatchPositions) { struct TestData { const std::string offsets_string; const size_t expected_match_count; - const int expected_matches[10]; + const size_t expected_matches[10]; } data[] = { { "0 0 1 2 0 0 4 1 0 0 1 5", 1, { 1,6 } }, { "0 0 1 4 0 0 2 1", 1, { 1,5 } }, diff --git a/chrome/browser/history_view.cc b/chrome/browser/history_view.cc index 4cec25f..c382226 100644 --- a/chrome/browser/history_view.cc +++ b/chrome/browser/history_view.cc @@ -122,8 +122,8 @@ class SnippetRenderer : public views::View { int x, int y, Snippet::MatchPositions::const_iterator match_iter, - int start, - int end); + size_t start, + size_t end); DISALLOW_EVIL_CONSTRUCTORS(SnippetRenderer); }; @@ -179,9 +179,12 @@ void SnippetRenderer::Paint(ChromeCanvas* canvas) { } int SnippetRenderer::ProcessRun( - ChromeCanvas* canvas, int x, int y, + ChromeCanvas* canvas, + int x, + int y, Snippet::MatchPositions::const_iterator match_iter, - int start, int end) { + size_t start, + size_t end) { int total_width = 0; while (start < end) { @@ -195,7 +198,7 @@ int SnippetRenderer::ProcessRun( // Determine the next substring to process by examining whether // we're before a match or within a match. ChromeFont* font = &text_font_; - int next = end; + size_t next = end; if (match_iter != snippet_.matches().end()) { if (match_iter->first > start) { // We're in a plain region. -- cgit v1.1