From 16cddbcefd7a364c416ffd352da8767170f29551 Mon Sep 17 00:00:00 2001 From: "battre@chromium.org" Date: Thu, 23 Aug 2012 15:03:26 +0000 Subject: Revert 152992 - Migrate WebRequestRedirectByRegExAction to use RE2 and roll RE2 to revision 97:401ab4168e8e This is a relanding of https://chromiumcodereview.appspot.com/10826120/ after RE2 has been fixed upstream to contain no static initializers. TBR=yoz@chromium.org BUG=112155 Review URL: https://chromiumcodereview.appspot.com/10873029 Reverting due to static initializers. TBR=battre@chromium.org Review URL: https://chromiumcodereview.appspot.com/10883009 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@152993 0039d316-1c4b-4281-b951-d872f2087c98 --- chrome/browser/DEPS | 1 - .../declarative_webrequest/webrequest_action.cc | 42 +- .../api/declarative_webrequest/webrequest_action.h | 18 +- chrome/chrome_browser_extensions.gypi | 1 - third_party/re2/CONTRIBUTORS | 1 - third_party/re2/README.chromium | 12 +- .../re2/patches/fix-implicit-conversion.patch | 2 +- third_party/re2/patches/re2-android.patch | 25 +- third_party/re2/patches/re2-msvc9-chrome.patch | 1140 +++++++++++++++++--- third_party/re2/patches/remove-posix-option.patch | 24 + third_party/re2/patches/remove-valgrind-code.patch | 21 +- third_party/re2/re2/compile.cc | 9 +- third_party/re2/re2/dfa.cc | 94 +- third_party/re2/re2/parse.cc | 16 +- third_party/re2/re2/prefilter.cc | 66 +- third_party/re2/re2/re2.cc | 89 +- third_party/re2/re2/re2.h | 97 +- third_party/re2/re2/regexp.cc | 45 +- third_party/re2/re2/regexp.h | 3 +- third_party/re2/re2/testing/filtered_re2_test.cc | 17 - third_party/re2/re2/testing/parse_test.cc | 75 +- third_party/re2/re2/testing/re2_arg_test.cc | 15 +- third_party/re2/re2/testing/re2_test.cc | 18 +- third_party/re2/re2/testing/set_test.cc | 12 - third_party/re2/util/logging.h | 13 +- third_party/re2/util/mutex.h | 23 +- third_party/re2/util/sparse_array.h | 8 +- third_party/re2/util/sparse_set.h | 10 +- third_party/re2/util/util.h | 2 +- third_party/re2/util/valgrind.cc | 10 +- third_party/re2/util/valgrind.h | 1 - 31 files changed, 1270 insertions(+), 640 deletions(-) create mode 100644 third_party/re2/patches/remove-posix-option.patch diff --git a/chrome/browser/DEPS b/chrome/browser/DEPS index 63969e9..e0a75e7 100644 --- a/chrome/browser/DEPS +++ b/chrome/browser/DEPS @@ -67,7 +67,6 @@ include_rules = [ "+third_party/libevent", # For the remote V8 debugging server "+third_party/libjingle", "+third_party/protobuf/src/google/protobuf", - "+third_party/re2", "+third_party/sqlite", "+third_party/undoview", diff --git a/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.cc b/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.cc index 38eacbf..d005903 100644 --- a/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.cc +++ b/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.cc @@ -10,6 +10,7 @@ #include "base/logging.h" #include "base/stringprintf.h" #include "base/string_util.h" +#include "base/utf_string_conversions.h" #include "base/values.h" #include "chrome/browser/extensions/api/declarative_webrequest/request_stage.h" #include "chrome/browser/extensions/api/declarative_webrequest/webrequest_constants.h" @@ -18,7 +19,6 @@ #include "chrome/browser/extensions/extension_info_map.h" #include "chrome/common/extensions/extension.h" #include "net/url_request/url_request.h" -#include "third_party/re2/re2/re2.h" namespace extensions { @@ -108,18 +108,20 @@ scoped_ptr CreateRedirectRequestByRegExAction( INPUT_FORMAT_VALIDATE(dict->GetString(keys::kFromKey, &from)); INPUT_FORMAT_VALIDATE(dict->GetString(keys::kToKey, &to)); - to = WebRequestRedirectByRegExAction::PerlToRe2Style(to); + // TODO(battre): Add this line once we migrate from ICU RegEx to RE2 RegEx.s + // to = WebRequestRedirectByRegExAction::PerlToRe2Style(to); - RE2::Options options; - options.set_case_sensitive(false); - scoped_ptr from_pattern(new RE2(from, options)); - - if (!from_pattern->ok()) { + UParseError parse_error; + UErrorCode status = U_ZERO_ERROR; + scoped_ptr pattern( + icu::RegexPattern::compile(icu::UnicodeString(from.data(), from.size()), + 0, parse_error, status)); + if (U_FAILURE(status) || !pattern.get()) { *error = "Invalid pattern '" + from + "' -> '" + to + "'"; return scoped_ptr(NULL); } return scoped_ptr( - new WebRequestRedirectByRegExAction(from_pattern.Pass(), to)); + new WebRequestRedirectByRegExAction(pattern.Pass(), to)); } scoped_ptr CreateSetRequestHeaderAction( @@ -577,7 +579,7 @@ WebRequestRedirectToEmptyDocumentAction::CreateDelta( // WebRequestRedirectByRegExAction::WebRequestRedirectByRegExAction( - scoped_ptr from_pattern, + scoped_ptr from_pattern, const std::string& to_pattern) : from_pattern_(from_pattern.Pass()), to_pattern_(to_pattern.data(), to_pattern.size()) {} @@ -652,17 +654,29 @@ LinkedPtrEventResponseDelta WebRequestRedirectByRegExAction::CreateDelta( CHECK(request_data.stage & GetStages()); CHECK(from_pattern_.get()); + UErrorCode status = U_ZERO_ERROR; const std::string& old_url = request_data.request->url().spec(); - std::string new_url = old_url; - if (!RE2::Replace(&new_url, *from_pattern_, to_pattern_) || - new_url == old_url) { + icu::UnicodeString old_url_unicode(old_url.data(), old_url.size()); + + scoped_ptr matcher( + from_pattern_->matcher(old_url_unicode, status)); + if (U_FAILURE(status) || !matcher.get()) + return LinkedPtrEventResponseDelta(NULL); + + icu::UnicodeString new_url = matcher->replaceAll(to_pattern_, status); + if (U_FAILURE(status)) + return LinkedPtrEventResponseDelta(NULL); + + std::string new_url_utf8; + UTF16ToUTF8(new_url.getBuffer(), new_url.length(), &new_url_utf8); + + if (new_url_utf8 == request_data.request->url().spec()) return LinkedPtrEventResponseDelta(NULL); - } LinkedPtrEventResponseDelta result( new extension_web_request_api_helpers::EventResponseDelta( extension_id, extension_install_time)); - result->new_url = GURL(new_url); + result->new_url = GURL(new_url_utf8); return result; } diff --git a/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.h b/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.h index e6a54d7..ecde852 100644 --- a/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.h +++ b/chrome/browser/extensions/api/declarative_webrequest/webrequest_action.h @@ -16,6 +16,7 @@ #include "chrome/browser/extensions/api/web_request/web_request_api_helpers.h" #include "chrome/common/extensions/api/events.h" #include "googleurl/src/gurl.h" +#include "unicode/regex.h" class WebRequestPermission; @@ -37,10 +38,6 @@ namespace net { class URLRequest; } -namespace re2 { -class RE2; -} - namespace extensions { typedef linked_ptr @@ -239,10 +236,11 @@ class WebRequestRedirectToEmptyDocumentAction : public WebRequestAction { // Action that instructs to redirect a network request. class WebRequestRedirectByRegExAction : public WebRequestAction { public: - // The |to_pattern| has to be passed in RE2 syntax with the exception that - // capture groups are referenced in Perl style ($1, $2, ...). - explicit WebRequestRedirectByRegExAction(scoped_ptr from_pattern, - const std::string& to_pattern); + // The |to_pattern| has to be passed in ICU syntax. + // TODO(battre): Change this to Perl style when migrated to RE2. + explicit WebRequestRedirectByRegExAction( + scoped_ptr from_pattern, + const std::string& to_pattern); virtual ~WebRequestRedirectByRegExAction(); // Conversion of capture group styles between Perl style ($1, $2, ...) and @@ -258,8 +256,8 @@ class WebRequestRedirectByRegExAction : public WebRequestAction { const base::Time& extension_install_time) const OVERRIDE; private: - scoped_ptr from_pattern_; - std::string to_pattern_; + scoped_ptr from_pattern_; + icu::UnicodeString to_pattern_; DISALLOW_COPY_AND_ASSIGN(WebRequestRedirectByRegExAction); }; diff --git a/chrome/chrome_browser_extensions.gypi b/chrome/chrome_browser_extensions.gypi index f24b6f6..efdfce0 100644 --- a/chrome/chrome_browser_extensions.gypi +++ b/chrome/chrome_browser_extensions.gypi @@ -38,7 +38,6 @@ '../third_party/icu/icu.gyp:icuuc', '../third_party/leveldatabase/leveldatabase.gyp:leveldatabase', '../third_party/libusb/libusb.gyp:libusb', - '../third_party/re2/re2.gyp:re2', '../ui/base/strings/ui_strings.gyp:ui_strings', '../ui/ui.gyp:ui', '../ui/ui.gyp:ui_resources', diff --git a/third_party/re2/CONTRIBUTORS b/third_party/re2/CONTRIBUTORS index 15053eb5..0b70807 100644 --- a/third_party/re2/CONTRIBUTORS +++ b/third_party/re2/CONTRIBUTORS @@ -27,7 +27,6 @@ # Please keep the list sorted. Brian Gunlogson -Dominic Battré Rob Pike Russ Cox Sanjay Ghemawat diff --git a/third_party/re2/README.chromium b/third_party/re2/README.chromium index abc79fd..1e315e1 100644 --- a/third_party/re2/README.chromium +++ b/third_party/re2/README.chromium @@ -1,9 +1,9 @@ Name: re2 - an efficient, principled regular expression library Short Name: re2 URL: http://code.google.com/p/re2/ -Version: 401ab4168e8e +Version: 83:c79416ca4228 Date: 2012-06-20 -Revision: 97:401ab4168e8e +Revision: 83:c79416ca4228 License: BSD 3-Clause License License File: LICENSE Security Critical: yes @@ -13,13 +13,13 @@ RE2 is a fast, safe, thread-friendly alternative to backtracking regular expression engines like those used in PCRE, Perl, and Python. Local Modifications (to be applied in this order): -- Rename POSIX configuration (patches/rename-posix-option.patch) +- Dropped POSIX configuration (patches/remove-posix-option.patch) +- Support for Windows (patches/re2-msvc9-chrome.patch) - Remove valgrind specific code that exists in chromium already (patches/remove-valgrind-code.patch) - Fix an implicit conversion from NULL to false (patches/fix-implicit-conversion.patch) -- Support for Windows (patches/re2-msvc9-chrome.patch) -- Support Android (patches/re2-android.patch) +- Support Android (re2-android.patch) - Remove testinstall.cc because it lacks a license header and remove executable bit from non-executable .py file. - (patches/remove-testinstall.cc-and-fix-filepermissions.patch) + (remove-testinstall.cc-and-fix-filepermissions.patch) diff --git a/third_party/re2/patches/fix-implicit-conversion.patch b/third_party/re2/patches/fix-implicit-conversion.patch index bc68f11..ab6c2f0 100644 --- a/third_party/re2/patches/fix-implicit-conversion.patch +++ b/third_party/re2/patches/fix-implicit-conversion.patch @@ -1,7 +1,7 @@ diff -r e12d4aa8907f re2/dfa.cc --- a/re2/dfa.cc Wed Jul 25 15:13:19 2012 +0200 +++ b/re2/dfa.cc Wed Jul 25 15:13:47 2012 +0200 -@@ -1778,7 +1780,7 @@ +@@ -1736,7 +1736,7 @@ return false; } if (params.start == DeadState) diff --git a/third_party/re2/patches/re2-android.patch b/third_party/re2/patches/re2-android.patch index bcaea8d..a622035 100644 --- a/third_party/re2/patches/re2-android.patch +++ b/third_party/re2/patches/re2-android.patch @@ -1,7 +1,6 @@ -diff --git a/third_party/re2/re2/parse.cc b/third_party/re2/re2/parse.cc -index 0cf4ab4..6423fe9 100644 ---- a/third_party/re2/re2/parse.cc -+++ b/third_party/re2/re2/parse.cc +diff -r cae1910ce3c5 re2/parse.cc +--- a/re2/parse.cc Mon Jul 30 16:12:46 2012 +0200 ++++ b/re2/parse.cc Tue Jul 31 14:12:31 2012 +0200 @@ -16,6 +16,8 @@ // and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W. // See regexp.h for rationale. @@ -11,10 +10,9 @@ index 0cf4ab4..6423fe9 100644 #include "util/util.h" #include "re2/regexp.h" #include "re2/stringpiece.h" -diff --git a/third_party/re2/re2/re2.cc b/third_party/re2/re2/re2.cc -index 989add6..78978f1 100644 ---- a/third_party/re2/re2/re2.cc -+++ b/third_party/re2/re2/re2.cc +diff -r cae1910ce3c5 re2/re2.cc +--- a/re2/re2.cc Mon Jul 30 16:12:46 2012 +0200 ++++ b/re2/re2.cc Tue Jul 31 14:12:31 2012 +0200 @@ -9,6 +9,8 @@ #include "re2/re2.h" @@ -24,10 +22,9 @@ index 989add6..78978f1 100644 #include #include #ifdef WIN32 -diff --git a/third_party/re2/util/util.h b/third_party/re2/util/util.h -index dab7e16..11b5f4a 100644 ---- a/third_party/re2/util/util.h -+++ b/third_party/re2/util/util.h +diff -r cae1910ce3c5 util/util.h +--- a/util/util.h Mon Jul 30 16:12:46 2012 +0200 ++++ b/util/util.h Tue Jul 31 14:12:31 2012 +0200 @@ -28,6 +28,7 @@ #include #include @@ -36,7 +33,7 @@ index dab7e16..11b5f4a 100644 #include "base/third_party/dynamic_annotations/dynamic_annotations.h" // Use std names. -@@ -44,7 +45,7 @@ using std::sort; +@@ -44,7 +45,7 @@ using std::swap; using std::make_pair; @@ -45,7 +42,7 @@ index dab7e16..11b5f4a 100644 #include using std::tr1::unordered_set; -@@ -52,7 +53,7 @@ using std::tr1::unordered_set; +@@ -52,7 +53,7 @@ #else #include diff --git a/third_party/re2/patches/re2-msvc9-chrome.patch b/third_party/re2/patches/re2-msvc9-chrome.patch index 17da0cd..2d9ed28 100644 --- a/third_party/re2/patches/re2-msvc9-chrome.patch +++ b/third_party/re2/patches/re2-msvc9-chrome.patch @@ -1,7 +1,6 @@ -diff --git a/third_party/re2/AUTHORS b/third_party/re2/AUTHORS -index 3c0f928..e17d9bf 100644 ---- a/third_party/re2/AUTHORS -+++ b/third_party/re2/AUTHORS +diff -r c79416ca4228 AUTHORS +--- a/AUTHORS Tue May 29 11:50:48 2012 -0400 ++++ b/AUTHORS Wed Jun 20 19:00:08 2012 +0200 @@ -8,5 +8,6 @@ # Please keep the list sorted. @@ -9,23 +8,20 @@ index 3c0f928..e17d9bf 100644 +Brian Gunlogson Google Inc. Stefano Rivera -diff --git a/third_party/re2/CONTRIBUTORS b/third_party/re2/CONTRIBUTORS -index ac64332..15053eb 100644 ---- a/third_party/re2/CONTRIBUTORS -+++ b/third_party/re2/CONTRIBUTORS +diff -r c79416ca4228 CONTRIBUTORS +--- a/CONTRIBUTORS Tue May 29 11:50:48 2012 -0400 ++++ b/CONTRIBUTORS Wed Jun 20 19:00:08 2012 +0200 @@ -26,6 +26,7 @@ # Please keep the list sorted. +Brian Gunlogson - Dominic Battré Rob Pike Russ Cox -diff --git a/third_party/re2/mswin/stdint.h b/third_party/re2/mswin/stdint.h -new file mode 100644 -index 0000000..d02608a ---- /dev/null -+++ b/third_party/re2/mswin/stdint.h + Sanjay Ghemawat +diff -r c79416ca4228 mswin/stdint.h +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/mswin/stdint.h Wed Jun 20 19:00:08 2012 +0200 @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 @@ -274,11 +270,383 @@ index 0000000..d02608a + + +#endif // _MSC_STDINT_H_ ] -diff --git a/third_party/re2/re2/compile.cc b/third_party/re2/re2/compile.cc -index 9cddb71..adb45fd 100644 ---- a/third_party/re2/re2/compile.cc -+++ b/third_party/re2/re2/compile.cc -@@ -502,7 +502,7 @@ int Compiler::RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next) { +diff -r c79416ca4228 re2.sln +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/re2.sln Wed Jun 20 19:00:08 2012 +0200 +@@ -0,0 +1,38 @@ ++ ++Microsoft Visual Studio Solution File, Format Version 10.00 ++# Visual Studio 2008 ++Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "re2", "re2.vcproj", "{494BD4B2-1ADD-4053-981D-BA14D6DF9219}" ++ ProjectSection(ProjectDependencies) = postProject ++ {AB36233A-643A-4D2E-93B3-0602DA52C8D5} = {AB36233A-643A-4D2E-93B3-0602DA52C8D5} ++ EndProjectSection ++EndProject ++Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "re2_testing", "re2_testing\re2_testing.vcproj", "{1B9A5974-DA06-4F57-BFFC-4DE19B968AE8}" ++ ProjectSection(ProjectDependencies) = postProject ++ {494BD4B2-1ADD-4053-981D-BA14D6DF9219} = {494BD4B2-1ADD-4053-981D-BA14D6DF9219} ++ EndProjectSection ++EndProject ++Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "util", "util\util.vcproj", "{AB36233A-643A-4D2E-93B3-0602DA52C8D5}" ++EndProject ++Global ++ GlobalSection(SolutionConfigurationPlatforms) = preSolution ++ Debug|Win32 = Debug|Win32 ++ Release|Win32 = Release|Win32 ++ EndGlobalSection ++ GlobalSection(ProjectConfigurationPlatforms) = postSolution ++ {494BD4B2-1ADD-4053-981D-BA14D6DF9219}.Debug|Win32.ActiveCfg = Debug|Win32 ++ {494BD4B2-1ADD-4053-981D-BA14D6DF9219}.Debug|Win32.Build.0 = Debug|Win32 ++ {494BD4B2-1ADD-4053-981D-BA14D6DF9219}.Release|Win32.ActiveCfg = Release|Win32 ++ {494BD4B2-1ADD-4053-981D-BA14D6DF9219}.Release|Win32.Build.0 = Release|Win32 ++ {1B9A5974-DA06-4F57-BFFC-4DE19B968AE8}.Debug|Win32.ActiveCfg = Debug|Win32 ++ {1B9A5974-DA06-4F57-BFFC-4DE19B968AE8}.Debug|Win32.Build.0 = Debug|Win32 ++ {1B9A5974-DA06-4F57-BFFC-4DE19B968AE8}.Release|Win32.ActiveCfg = Release|Win32 ++ {1B9A5974-DA06-4F57-BFFC-4DE19B968AE8}.Release|Win32.Build.0 = Release|Win32 ++ {AB36233A-643A-4D2E-93B3-0602DA52C8D5}.Debug|Win32.ActiveCfg = Debug|Win32 ++ {AB36233A-643A-4D2E-93B3-0602DA52C8D5}.Debug|Win32.Build.0 = Debug|Win32 ++ {AB36233A-643A-4D2E-93B3-0602DA52C8D5}.Release|Win32.ActiveCfg = Release|Win32 ++ {AB36233A-643A-4D2E-93B3-0602DA52C8D5}.Release|Win32.Build.0 = Release|Win32 ++ EndGlobalSection ++ GlobalSection(SolutionProperties) = preSolution ++ HideSolutionNode = FALSE ++ EndGlobalSection ++EndGlobal +diff -r c79416ca4228 re2.vcproj +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/re2.vcproj Wed Jun 20 19:00:08 2012 +0200 +@@ -0,0 +1,327 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff -r c79416ca4228 re2/compile.cc +--- a/re2/compile.cc Tue May 29 11:50:48 2012 -0400 ++++ b/re2/compile.cc Wed Jun 20 19:00:08 2012 +0200 +@@ -501,7 +501,7 @@ return UncachedRuneByteSuffix(lo, hi, foldcase, next); } @@ -287,10 +655,9 @@ index 9cddb71..adb45fd 100644 map::iterator it = rune_cache_.find(key); if (it != rune_cache_.end()) return it->second; -diff --git a/third_party/re2/re2/prefilter_tree.cc b/third_party/re2/re2/prefilter_tree.cc -index d8bc37a..cdcf77e 100644 ---- a/third_party/re2/re2/prefilter_tree.cc -+++ b/third_party/re2/re2/prefilter_tree.cc +diff -r c79416ca4228 re2/prefilter_tree.cc +--- a/re2/prefilter_tree.cc Tue May 29 11:50:48 2012 -0400 ++++ b/re2/prefilter_tree.cc Wed Jun 20 19:00:08 2012 +0200 @@ -8,6 +8,11 @@ #include "re2/prefilter_tree.h" #include "re2/re2.h" @@ -303,10 +670,9 @@ index d8bc37a..cdcf77e 100644 DEFINE_int32(filtered_re2_min_atom_len, 3, "Strings less than this length are not stored as atoms"); -diff --git a/third_party/re2/re2/re2.cc b/third_party/re2/re2/re2.cc -index 3cc3dd4..989add6 100644 ---- a/third_party/re2/re2/re2.cc -+++ b/third_party/re2/re2/re2.cc +diff -r c79416ca4228 re2/re2.cc +--- a/re2/re2.cc Tue May 29 11:50:48 2012 -0400 ++++ b/re2/re2.cc Wed Jun 20 19:00:08 2012 +0200 @@ -11,7 +11,13 @@ #include @@ -321,73 +687,40 @@ index 3cc3dd4..989add6 100644 #include #include "util/util.h" #include "util/flags.h" -@@ -31,10 +37,22 @@ const VariadicFunction2 RE2::Consume; const VariadicFunction2 RE2::FindAndConsume; --// This will trigger LNK2005 error in MSVC. --#ifndef COMPILER_MSVC -const int RE2::Options::kDefaultMaxMem; // initialized in re2.h --#endif // COMPILER_MSVC +- + // Commonly-used option sets; arguments to constructor are: + // utf8 input + // posix syntax +diff -r c79416ca4228 re2/re2.h +--- a/re2/re2.h Tue May 29 11:50:48 2012 -0400 ++++ b/re2/re2.h Wed Jun 20 19:00:08 2012 +0200 +@@ -5,6 +5,8 @@ + #ifndef RE2_RE2_H + #define RE2_RE2_H + +#define kDefaultMaxMem (8<<20) + -+RE2::Options::Options() -+ : encoding_(EncodingUTF8), -+ posix_syntax_(false), -+ longest_match_(false), -+ log_errors_(true), -+ max_mem_(kDefaultMaxMem), -+ literal_(false), -+ never_nl_(false), -+ never_capture_(false), -+ case_sensitive_(true), -+ perl_classes_(false), -+ word_boundary_(false), -+ one_line_(false) { -+} - - RE2::Options::Options(RE2::CannedOptions opt) - : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), -diff --git a/third_party/re2/re2/re2.h b/third_party/re2/re2/re2.h -index 9f5b66d..98b06b8 100644 ---- a/third_party/re2/re2/re2.h -+++ b/third_party/re2/re2/re2.h -@@ -552,28 +552,16 @@ class RE2 { + // C++ interface to the re2 regular-expression library. + // RE2 supports Perl-style regular expressions (with extensions like + // \d, \w, \s, ...). +@@ -517,9 +519,6 @@ + // Once a DFA fills its budget, it flushes its cache and starts over. // If this happens too often, RE2 falls back on the NFA implementation. - // For now, make the default budget something close to Code Search. -+#ifndef WIN32 - static const int kDefaultMaxMem = 8<<20; -+#endif - +- // For now, make the default budget something close to Code Search. +- static const int kDefaultMaxMem = 8<<20; +- enum Encoding { EncodingUTF8 = 1, EncodingLatin1 - }; - -- Options() : -- encoding_(EncodingUTF8), -- posix_syntax_(false), -- longest_match_(false), -- log_errors_(true), -- max_mem_(kDefaultMaxMem), -- literal_(false), -- never_nl_(false), -- never_capture_(false), -- case_sensitive_(true), -- perl_classes_(false), -- word_boundary_(false), -- one_line_(false) { -- } -- -+ Options(); - /*implicit*/ Options(CannedOptions); - - Encoding encoding() const { return encoding_; } -diff --git a/third_party/re2/re2/stringpiece.h b/third_party/re2/re2/stringpiece.h -index ab9297c..38a5150 100644 ---- a/third_party/re2/re2/stringpiece.h -+++ b/third_party/re2/re2/stringpiece.h +diff -r c79416ca4228 re2/stringpiece.h +--- a/re2/stringpiece.h Tue May 29 11:50:48 2012 -0400 ++++ b/re2/stringpiece.h Wed Jun 20 19:00:08 2012 +0200 @@ -23,6 +23,9 @@ #include #include @@ -398,10 +731,9 @@ index ab9297c..38a5150 100644 namespace re2 { -diff --git a/third_party/re2/re2/testing/re2_test.cc b/third_party/re2/re2/testing/re2_test.cc -index b99cacf..911e868 100644 ---- a/third_party/re2/re2/testing/re2_test.cc -+++ b/third_party/re2/re2/testing/re2_test.cc +diff -r c79416ca4228 re2/testing/re2_test.cc +--- a/re2/testing/re2_test.cc Tue May 29 11:50:48 2012 -0400 ++++ b/re2/testing/re2_test.cc Wed Jun 20 19:00:08 2012 +0200 @@ -6,7 +6,9 @@ // TODO: Test extractions for PartialMatch/Consume @@ -424,7 +756,7 @@ index b99cacf..911e868 100644 DECLARE_bool(logtostderr); namespace re2 { -@@ -657,6 +664,7 @@ TEST(RE2, FullMatchTypedNullArg) { +@@ -657,6 +664,7 @@ CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); } @@ -432,7 +764,7 @@ index b99cacf..911e868 100644 // Check that numeric parsing code does not read past the end of // the number being parsed. TEST(RE2, NULTerminated) { -@@ -678,6 +686,7 @@ TEST(RE2, NULTerminated) { +@@ -678,6 +686,7 @@ CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); CHECK_EQ(x, 1); } @@ -440,10 +772,311 @@ index b99cacf..911e868 100644 TEST(RE2, FullMatchTypeTests) { // Type tests -diff --git a/third_party/re2/util/logging.h b/third_party/re2/util/logging.h -index 734e7a1..7302ea6 100644 ---- a/third_party/re2/util/logging.h -+++ b/third_party/re2/util/logging.h +diff -r c79416ca4228 re2_testing/re2_testing.vcproj +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/re2_testing/re2_testing.vcproj Wed Jun 20 19:00:08 2012 +0200 +@@ -0,0 +1,298 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff -r c79416ca4228 util/logging.h +--- a/util/logging.h Tue May 29 11:50:48 2012 -0400 ++++ b/util/logging.h Wed Jun 20 19:00:08 2012 +0200 @@ -7,8 +7,13 @@ #ifndef RE2_UTIL_LOGGING_H__ #define RE2_UTIL_LOGGING_H__ @@ -458,10 +1091,9 @@ index 734e7a1..7302ea6 100644 // Debug-only checking. #define DCHECK(condition) assert(condition) -diff --git a/third_party/re2/util/mutex.h b/third_party/re2/util/mutex.h -index 9787bfb..e321fae 100644 ---- a/third_party/re2/util/mutex.h -+++ b/third_party/re2/util/mutex.h +diff -r c79416ca4228 util/mutex.h +--- a/util/mutex.h Tue May 29 11:50:48 2012 -0400 ++++ b/util/mutex.h Wed Jun 20 19:00:08 2012 +0200 @@ -12,8 +12,10 @@ namespace re2 { @@ -473,7 +1105,7 @@ index 9787bfb..e321fae 100644 #if defined(NO_THREADS) typedef int MutexType; // to keep a lock-count -@@ -32,7 +34,9 @@ namespace re2 { +@@ -32,7 +34,9 @@ # include typedef pthread_mutex_t MutexType; #elif defined(WIN32) @@ -484,10 +1116,9 @@ index 9787bfb..e321fae 100644 # ifdef GMUTEX_TRYLOCK // We need Windows NT or later for TryEnterCriticalSection(). If you // don't need that functionality, you can remove these _WIN32_WINNT -diff --git a/third_party/re2/util/pcre.cc b/third_party/re2/util/pcre.cc -index 5e67e1f..1602133 100644 ---- a/third_party/re2/util/pcre.cc -+++ b/third_party/re2/util/pcre.cc +diff -r c79416ca4228 util/pcre.cc +--- a/util/pcre.cc Tue May 29 11:50:48 2012 -0400 ++++ b/util/pcre.cc Wed Jun 20 19:00:08 2012 +0200 @@ -11,6 +11,11 @@ #include "util/flags.h" #include "util/pcre.h" @@ -500,11 +1131,10 @@ index 5e67e1f..1602133 100644 #define PCREPORT(level) LOG(level) // Default PCRE limits. -diff --git a/third_party/re2/util/pcre.h b/third_party/re2/util/pcre.h -index 4dda95d..771ac91 100644 ---- a/third_party/re2/util/pcre.h -+++ b/third_party/re2/util/pcre.h -@@ -180,9 +180,15 @@ struct pcre_extra { int flags, match_limit, match_limit_recursion; }; +diff -r c79416ca4228 util/pcre.h +--- a/util/pcre.h Tue May 29 11:50:48 2012 -0400 ++++ b/util/pcre.h Wed Jun 20 19:00:08 2012 +0200 +@@ -180,9 +180,15 @@ #define PCRE_ERROR_MATCHLIMIT 2 #define PCRE_ERROR_RECURSIONLIMIT 3 #define PCRE_INFO_CAPTURECOUNT 0 @@ -520,10 +1150,9 @@ index 4dda95d..771ac91 100644 } // namespace re2 #endif -diff --git a/third_party/re2/util/stringprintf.cc b/third_party/re2/util/stringprintf.cc -index c908181..d4691d1 100644 ---- a/third_party/re2/util/stringprintf.cc -+++ b/third_party/re2/util/stringprintf.cc +diff -r c79416ca4228 util/stringprintf.cc +--- a/util/stringprintf.cc Tue May 29 11:50:48 2012 -0400 ++++ b/util/stringprintf.cc Wed Jun 20 19:00:08 2012 +0200 @@ -4,6 +4,10 @@ #include "util/util.h" @@ -535,10 +1164,9 @@ index c908181..d4691d1 100644 namespace re2 { static void StringAppendV(string* dst, const char* format, va_list ap) { -diff --git a/third_party/re2/util/test.cc b/third_party/re2/util/test.cc -index 0644829..2fe1bfa 100644 ---- a/third_party/re2/util/test.cc -+++ b/third_party/re2/util/test.cc +diff -r c79416ca4228 util/test.cc +--- a/util/test.cc Tue May 29 11:50:48 2012 -0400 ++++ b/util/test.cc Wed Jun 20 19:00:08 2012 +0200 @@ -3,7 +3,9 @@ // license that can be found in the LICENSE file. @@ -549,7 +1177,7 @@ index 0644829..2fe1bfa 100644 #include "util/test.h" DEFINE_string(test_tmpdir, "/var/tmp", "temp directory"); -@@ -23,9 +25,13 @@ void RegisterTest(void (*fn)(void), const char *name) { +@@ -23,9 +25,13 @@ namespace re2 { int64 VirtualProcessSize() { @@ -563,10 +1191,9 @@ index 0644829..2fe1bfa 100644 } } // namespace re2 -diff --git a/third_party/re2/util/util.h b/third_party/re2/util/util.h -index 63a9c6f..dab7e16 100644 ---- a/third_party/re2/util/util.h -+++ b/third_party/re2/util/util.h +diff -r c79416ca4228 util/util.h +--- a/util/util.h Tue May 29 11:50:48 2012 -0400 ++++ b/util/util.h Wed Jun 20 19:00:08 2012 +0200 @@ -12,7 +12,9 @@ #include // For size_t #include @@ -577,7 +1204,7 @@ index 63a9c6f..dab7e16 100644 #include // C++ -@@ -50,7 +52,11 @@ using std::tr1::unordered_set; +@@ -48,7 +50,11 @@ #else #include @@ -589,19 +1216,276 @@ index 63a9c6f..dab7e16 100644 #endif -diff --git a/third_party/re2/util/valgrind.h b/third_party/re2/util/valgrind.h -index ca10b1a..d097b0c 100644 ---- a/third_party/re2/util/valgrind.h -+++ b/third_party/re2/util/valgrind.h -@@ -4064,6 +4064,7 @@ typedef - #endif /* PLAT_ppc64_aix5 */ +diff -r c79416ca4228 util/util.vcproj +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/util/util.vcproj Wed Jun 20 19:00:08 2012 +0200 +@@ -0,0 +1,253 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff -r c79416ca4228 util/valgrind.h +--- a/util/valgrind.h Tue May 29 11:50:48 2012 -0400 ++++ b/util/valgrind.h Wed Jun 20 19:00:08 2012 +0200 +@@ -4063,7 +4063,7 @@ + #endif /* PLAT_ppc64_aix5 */ +- +#ifndef WIN32 /* ------------------------------------------------------------------ */ /* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ /* */ -@@ -4170,7 +4171,7 @@ typedef +@@ -4170,7 +4170,7 @@ VG_USERREQ__DISCARD_TRANSLATIONS, \ _qzz_addr, _qzz_len, 0, 0, 0); \ } diff --git a/third_party/re2/patches/remove-posix-option.patch b/third_party/re2/patches/remove-posix-option.patch new file mode 100644 index 0000000..ba55172 --- /dev/null +++ b/third_party/re2/patches/remove-posix-option.patch @@ -0,0 +1,24 @@ +diff -r 2d252384c5e8 re2/re2.cc +--- a/re2/re2.cc Mon Mar 05 14:20:36 2012 -0500 ++++ b/re2/re2.cc Wed Jun 20 20:32:24 2012 +0200 +@@ -40,7 +40,7 @@ + // log errors + const RE2::Options RE2::DefaultOptions; // EncodingUTF8, false, false, true + const RE2::Options RE2::Latin1(RE2::Options::EncodingLatin1, false, false, true); +-const RE2::Options RE2::POSIX(RE2::Options::EncodingUTF8, true, true, true); ++//const RE2::Options RE2::POSIX(RE2::Options::EncodingUTF8, true, true, true); + const RE2::Options RE2::Quiet(RE2::Options::EncodingUTF8, false, false, false); + + // If a regular expression has no error, its error_ field points here +diff -r 2d252384c5e8 re2/re2.h +--- a/re2/re2.h Mon Mar 05 14:20:36 2012 -0500 ++++ b/re2/re2.h Wed Jun 20 20:32:24 2012 +0200 +@@ -233,7 +233,7 @@ + // RE2 constructor. + static const Options DefaultOptions; + static const Options Latin1; // treat input as Latin-1 (default UTF-8) +- static const Options POSIX; // POSIX syntax, leftmost-longest match ++ //static const Options POSIX; // POSIX syntax, leftmost-longest match + static const Options Quiet; // do not log about regexp parse errors + + // Need to have the const char* and const string& forms for implicit diff --git a/third_party/re2/patches/remove-valgrind-code.patch b/third_party/re2/patches/remove-valgrind-code.patch index 1d8884a..20fea48 100644 --- a/third_party/re2/patches/remove-valgrind-code.patch +++ b/third_party/re2/patches/remove-valgrind-code.patch @@ -1,7 +1,6 @@ -diff --git a/third_party/re2/re2/dfa.cc b/third_party/re2/re2/dfa.cc -index 3a6a387..32c8c33 100644 ---- a/third_party/re2/re2/dfa.cc -+++ b/third_party/re2/re2/dfa.cc +diff -r ceab86d470c1 re2/dfa.cc +--- a/re2/dfa.cc Tue Jul 03 16:43:11 2012 +0200 ++++ b/re2/dfa.cc Tue Jul 03 17:03:00 2012 +0200 @@ -27,6 +27,8 @@ #include "util/flags.h" #include "util/sparse_set.h" @@ -11,11 +10,10 @@ index 3a6a387..32c8c33 100644 DEFINE_bool(re2_dfa_bail_when_slow, true, "Whether the RE2 DFA should bail out early " "if the NFA would be faster (for testing)."); -diff --git a/third_party/re2/util/util.h b/third_party/re2/util/util.h -index a43ff76..63a9c6f 100644 ---- a/third_party/re2/util/util.h -+++ b/third_party/re2/util/util.h -@@ -26,6 +26,8 @@ +diff -r ceab86d470c1 util/util.h +--- a/util/util.h Tue Jul 03 16:43:11 2012 +0200 ++++ b/util/util.h Tue Jul 03 17:03:00 2012 +0200 +@@ -28,6 +28,8 @@ #include #include @@ -24,7 +22,7 @@ index a43ff76..63a9c6f 100644 // Use std names. using std::set; using std::pair; -@@ -80,17 +82,6 @@ template struct CompileAssert {}; +@@ -86,16 +88,6 @@ #define arraysize(array) (sizeof(array)/sizeof((array)[0])) @@ -37,12 +35,11 @@ index a43ff76..63a9c6f 100644 -#define NO_THREAD_SAFETY_ANALYSIS -#define ANNOTATE_HAPPENS_BEFORE(x) -#define ANNOTATE_HAPPENS_AFTER(x) --#define ANNOTATE_UNPROTECTED_READ(x) (x) - class StringPiece; string CEscape(const StringPiece& src); -@@ -116,8 +107,6 @@ static inline uint64 Hash64StringWithSeed(const char* s, int len, uint32 seed) { +@@ -121,8 +113,6 @@ return ((uint64)x << 32) | y; } diff --git a/third_party/re2/re2/compile.cc b/third_party/re2/re2/compile.cc index adb45fd..0c96d33 100644 --- a/third_party/re2/re2/compile.cc +++ b/third_party/re2/re2/compile.cc @@ -44,7 +44,7 @@ struct PatchList { static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2); }; -static PatchList nullPatchList; +static PatchList nullPatchList = { 0 }; // Returns patch list containing just p. PatchList PatchList::Mk(uint32 p) { @@ -106,12 +106,11 @@ struct Frag { uint32 begin; PatchList end; - explicit Frag(LinkerInitialized) {} Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector Frag(uint32 begin, PatchList end) : begin(begin), end(end) {} }; -static Frag kNullFrag(LINKER_INITIALIZED); +static Frag kNullFrag; // Input encodings. enum Encoding { @@ -589,7 +588,7 @@ static struct ByteRangeProg { }; void Compiler::Add_80_10ffff() { - int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning + int inst[arraysize(prog_80_10ffff)]; for (int i = 0; i < arraysize(prog_80_10ffff); i++) { const ByteRangeProg& p = prog_80_10ffff[i]; int next = 0; @@ -733,7 +732,7 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, Frag f = Match(re->match_id()); // Remember unanchored match to end of string. if (anchor_ != RE2::ANCHOR_BOTH) - f = Cat(DotStar(), Cat(EmptyWidth(kEmptyEndText), f)); + f = Cat(DotStar(), f); return f; } diff --git a/third_party/re2/re2/dfa.cc b/third_party/re2/re2/dfa.cc index 32c8c33..344ef41 100644 --- a/third_party/re2/re2/dfa.cc +++ b/third_party/re2/re2/dfa.cc @@ -117,7 +117,6 @@ class DFA { kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left }; -#ifndef STL_MSVC // STL function structures for use with unordered_set. struct StateEqual { bool operator()(const State* a, const State* b) const { @@ -135,7 +134,6 @@ class DFA { return true; // they're equal } }; -#endif // STL_MSVC struct StateHash { size_t operator()(const State* a) const { if (a == NULL) @@ -147,34 +145,9 @@ class DFA { else return Hash64StringWithSeed(s, len, a->flag_); } -#ifdef STL_MSVC - // Less than operator. - bool operator()(const State* a, const State* b) const { - if (a == b) - return false; - if (a == NULL || b == NULL) - return a == NULL; - if (a->ninst_ != b->ninst_) - return a->ninst_ < b->ninst_; - if (a->flag_ != b->flag_) - return a->flag_ < b->flag_; - for (int i = 0; i < a->ninst_; ++i) - if (a->inst_[i] != b->inst_[i]) - return a->inst_[i] < b->inst_[i]; - return false; // they're equal - } - // The two public members are required by msvc. 4 and 8 are default values. - // Reference: http://msdn.microsoft.com/en-us/library/1s1byw77.aspx - static const size_t bucket_size = 4; - static const size_t min_buckets = 8; -#endif // STL_MSVC }; -#ifdef STL_MSVC - typedef unordered_set StateSet; -#else // !STL_MSVC typedef unordered_set StateSet; -#endif // STL_MSVC private: @@ -991,10 +964,8 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // If someone else already computed this, return it. MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering - State* ns = state->next_[ByteMap(c)]; - ANNOTATE_HAPPENS_AFTER(ns); - if (ns != NULL) - return ns; + if (state->next_[ByteMap(c)]) + return state->next_[ByteMap(c)]; // Convert state into Workq. StateToWorkq(state, q0_); @@ -1037,17 +1008,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { } bool ismatch = false; RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_, start_unanchored_); - - // Most of the time, we build the state from the output of - // RunWorkqOnByte, so swap q0_ and q1_ here. However, so that - // RE2::Set can tell exactly which match instructions - // contributed to the match, don't swap if c is kByteEndText. - // The resulting state wouldn't be correct for further processing - // of the string, but we're at the end of the text so that's okay. - // Leaving q0_ alone preseves the match instructions that led to - // the current setting of ismatch. - if (c != kByteEndText || kind_ != Prog::kManyMatch) - swap(q0_, q1_); + swap(q0_, q1_); // Save afterflag along with ismatch and isword in new state. uint flag = afterflag; @@ -1056,7 +1017,7 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { if (isword) flag |= kFlagLastWord; - ns = WorkqToCachedState(q0_, flag); + State* ns = WorkqToCachedState(q0_, flag); // Write barrier before updating state->next_ so that the // main search loop can proceed without any locking, for speed. @@ -1065,9 +1026,9 @@ DFA::State* DFA::RunStateOnByte(State* state, int c) { // a) the access to next_ should be ignored, // b) 'ns' is properly published. WriteMemoryBarrier(); // Flush ns before linking to it. + ANNOTATE_PUBLISH_MEMORY_RANGE(ns, sizeof(*ns)); ANNOTATE_IGNORE_WRITES_BEGIN(); - ANNOTATE_HAPPENS_BEFORE(ns); state->next_[ByteMap(c)] = ns; ANNOTATE_IGNORE_WRITES_END(); return ns; @@ -1392,7 +1353,6 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering State* ns = s->next_[bytemap[c]]; - ANNOTATE_HAPPENS_AFTER(ns); if (ns == NULL) { ns = RunStateOnByteUnlocked(s, c); if (ns == NULL) { @@ -1464,6 +1424,20 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, } } + // Peek in state to see if a match is coming up. + if (params->matches && kind_ == Prog::kManyMatch) { + vector* v = params->matches; + v->clear(); + if (s > SpecialStateMax) { + for (int i = 0; i < s->ninst_; i++) { + Prog::Inst* ip = prog_->inst(s->inst_[i]); + if (ip->opcode() == kInstMatch) + v->push_back(ip->match_id()); + } + } + } + + // Process one more byte to see if it triggers a match. // (Remember, matches are delayed one byte.) int lastbyte; @@ -1481,7 +1455,6 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, MaybeReadMemoryBarrier(); // On alpha we need to ensure read ordering State* ns = s->next_[ByteMap(lastbyte)]; - ANNOTATE_HAPPENS_AFTER(ns); if (ns == NULL) { ns = RunStateOnByteUnlocked(s, lastbyte); if (ns == NULL) { @@ -1509,15 +1482,6 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params, if (s > SpecialStateMax && s->IsMatch()) { matched = true; lastmatch = p; - if (params->matches && kind_ == Prog::kManyMatch) { - vector* v = params->matches; - v->clear(); - for (int i = 0; i < s->ninst_; i++) { - Prog::Inst* ip = prog_->inst(s->inst_[i]); - if (ip->opcode() == kInstMatch) - v->push_back(ip->match_id()); - } - } if (DebugDFA) fprintf(stderr, "match @%d! [%s]\n", static_cast(lastmatch - bp), DumpState(s).c_str()); @@ -1675,7 +1639,7 @@ bool DFA::AnalyzeSearch(SearchParams* params) { DumpState(info->start).c_str(), info->firstbyte); params->start = info->start; - params->firstbyte = ANNOTATE_UNPROTECTED_READ(info->firstbyte); + params->firstbyte = info->firstbyte; return true; } @@ -1684,16 +1648,12 @@ bool DFA::AnalyzeSearch(SearchParams* params) { bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, uint flags) { // Quick check; okay because of memory barriers below. - if (ANNOTATE_UNPROTECTED_READ(info->firstbyte) != kFbUnknown) { - ANNOTATE_HAPPENS_AFTER(&info->firstbyte); + if (info->firstbyte != kFbUnknown) return true; - } MutexLock l(&mutex_); - if (info->firstbyte != kFbUnknown) { - ANNOTATE_HAPPENS_AFTER(&info->firstbyte); + if (info->firstbyte != kFbUnknown) return true; - } q0_->clear(); AddToQueue(q0_, @@ -1704,14 +1664,12 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, return false; if (info->start == DeadState) { - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); WriteMemoryBarrier(); // Synchronize with "quick check" above. info->firstbyte = kFbNone; return true; } if (info->start == FullMatchState) { - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); WriteMemoryBarrier(); // Synchronize with "quick check" above. info->firstbyte = kFbNone; // will be ignored return true; @@ -1724,7 +1682,6 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, for (int i = 0; i < 256; i++) { State* s = RunStateOnByte(info->start, i); if (s == NULL) { - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); WriteMemoryBarrier(); // Synchronize with "quick check" above. info->firstbyte = firstbyte; return false; @@ -1739,7 +1696,6 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, break; } } - ANNOTATE_HAPPENS_BEFORE(&info->firstbyte); WriteMemoryBarrier(); // Synchronize with "quick check" above. info->firstbyte = firstbyte; return true; @@ -1822,7 +1778,7 @@ DFA* Prog::GetDFA(MatchKind kind) { } // Quick check; okay because of memory barrier below. - DFA *dfa = ANNOTATE_UNPROTECTED_READ(*pdfa); + DFA *dfa = *pdfa; if (dfa != NULL) { ANNOTATE_HAPPENS_AFTER(dfa); return dfa; @@ -1830,10 +1786,8 @@ DFA* Prog::GetDFA(MatchKind kind) { MutexLock l(&dfa_mutex_); dfa = *pdfa; - if (dfa != NULL) { - ANNOTATE_HAPPENS_AFTER(dfa); + if (dfa != NULL) return dfa; - } // For a forward DFA, half the memory goes to each DFA. // For a reverse DFA, all the memory goes to the diff --git a/third_party/re2/re2/parse.cc b/third_party/re2/re2/parse.cc index 6423fe9..551555a 100644 --- a/third_party/re2/re2/parse.cc +++ b/third_party/re2/re2/parse.cc @@ -1455,13 +1455,6 @@ static void AddUGroup(CharClassBuilder *cc, UGroup *g, int sign, // to what's already missing. Too hard, so do in two steps. CharClassBuilder ccb1; AddUGroup(&ccb1, g, +1, parse_flags); - // If the flags say to take out \n, put it in, so that negating will take it out. - // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags. - bool cutnl = !(parse_flags & Regexp::ClassNL) || - (parse_flags & Regexp::NeverNL); - if (cutnl) { - ccb1.AddRange('\n', '\n'); - } ccb1.Negate(); cc->AddCharClass(&ccb1); return; @@ -2004,13 +1997,8 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, return NULL; break; } - if (ps.flags() & NeverCapture) { - if (!ps.DoLeftParenNoCapture()) - return NULL; - } else { - if (!ps.DoLeftParen(NULL)) - return NULL; - } + if (!ps.DoLeftParen(NULL)) + return NULL; t.remove_prefix(1); // '(' break; diff --git a/third_party/re2/re2/prefilter.cc b/third_party/re2/re2/prefilter.cc index 4b9c35d..30e4c01 100644 --- a/third_party/re2/re2/prefilter.cc +++ b/third_party/re2/re2/prefilter.cc @@ -181,12 +181,6 @@ static Rune ToLowerRune(Rune r) { return ApplyFold(f, r); } -static Rune ToLowerRuneLatin1(Rune r) { - if ('A' <= r && r <= 'Z') - r += 'a' - 'A'; - return r; -} - Prefilter* Prefilter::FromString(const string& str) { Prefilter* m = new Prefilter(Prefilter::ATOM); m->atom_ = str; @@ -211,9 +205,8 @@ class Prefilter::Info { static Info* EmptyString(); static Info* NoMatch(); static Info* AnyChar(); - static Info* CClass(CharClass* cc, bool latin1); + static Info* CClass(CharClass* cc); static Info* Literal(Rune r); - static Info* LiteralLatin1(Rune r); static Info* AnyMatch(); // Format Info as a string. @@ -397,11 +390,6 @@ static string RuneToString(Rune r) { return string(buf, n); } -static string RuneToStringLatin1(Rune r) { - char c = r & 0xff; - return string(&c, 1); -} - // Constructs Info for literal rune. Prefilter::Info* Prefilter::Info::Literal(Rune r) { Info* info = new Info(); @@ -410,14 +398,6 @@ Prefilter::Info* Prefilter::Info::Literal(Rune r) { return info; } -// Constructs Info for literal rune for Latin1 encoded string. -Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { - Info* info = new Info(); - info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); - info->is_exact_ = true; - return info; -} - // Constructs Info for dot (any character). Prefilter::Info* Prefilter::Info::AnyChar() { Prefilter::Info* info = new Prefilter::Info(); @@ -452,8 +432,7 @@ Prefilter::Info* Prefilter::Info::EmptyString() { // Constructs Prefilter::Info for a character class. typedef CharClass::iterator CCIter; -Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, - bool latin1) { +Prefilter::Info* Prefilter::Info::CClass(CharClass *cc) { if (Trace) { VLOG(0) << "CharClassInfo:"; for (CCIter i = cc->begin(); i != cc->end(); ++i) @@ -466,14 +445,8 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, Prefilter::Info *a = new Prefilter::Info(); for (CCIter i = cc->begin(); i != cc->end(); ++i) - for (Rune r = i->lo; r <= i->hi; r++) { - if (latin1) { - a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); - } else { - a->exact_.insert(RuneToString(ToLowerRune(r))); - } - } - + for (Rune r = i->lo; r <= i->hi; r++) + a->exact_.insert(RuneToString(ToLowerRune(r))); a->is_exact_ = true; @@ -486,7 +459,7 @@ Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, class Prefilter::Info::Walker : public Regexp::Walker { public: - Walker(bool latin1) : latin1_(latin1) {} + Walker() {} virtual Info* PostVisit( Regexp* re, Info* parent_arg, @@ -497,9 +470,7 @@ class Prefilter::Info::Walker : public Regexp::Walker { Regexp* re, Info* parent_arg); - bool latin1() { return latin1_; } private: - bool latin1_; DISALLOW_EVIL_CONSTRUCTORS(Walker); }; @@ -507,9 +478,7 @@ Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { if (Trace) { LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); } - - bool latin1 = re->parse_flags() & Regexp::Latin1; - Prefilter::Info::Walker w(latin1); + Prefilter::Info::Walker w; Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); if (w.stopped_early()) { @@ -555,12 +524,7 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; case kRegexpLiteral: - if (latin1()) { - info = LiteralLatin1(re->rune()); - } - else { - info = Literal(re->rune()); - } + info = Literal(re->rune()); break; case kRegexpLiteralString: @@ -568,17 +532,9 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( info = NoMatch(); break; } - if (latin1()) { - info = LiteralLatin1(re->runes()[0]); - for (int i = 1; i < re->nrunes(); i++) { - info = Concat(info, LiteralLatin1(re->runes()[i])); - } - } else { - info = Literal(re->runes()[0]); - for (int i = 1; i < re->nrunes(); i++) { - info = Concat(info, Literal(re->runes()[i])); - } - } + info = Literal(re->runes()[0]); + for (int i = 1; i < re->nrunes(); i++) + info = Concat(info, Literal(re->runes()[i])); break; case kRegexpConcat: { @@ -629,7 +585,7 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit( break; case kRegexpCharClass: - info = CClass(re->cc(), latin1()); + info = CClass(re->cc()); break; case kRegexpCapture: diff --git a/third_party/re2/re2/re2.cc b/third_party/re2/re2/re2.cc index 78978f1..858b6a48 100644 --- a/third_party/re2/re2/re2.cc +++ b/third_party/re2/re2/re2.cc @@ -39,54 +39,18 @@ const VariadicFunction2 RE2::Consume; const VariadicFunction2 RE2::FindAndConsume; -#define kDefaultMaxMem (8<<20) - -RE2::Options::Options() - : encoding_(EncodingUTF8), - posix_syntax_(false), - longest_match_(false), - log_errors_(true), - max_mem_(kDefaultMaxMem), - literal_(false), - never_nl_(false), - never_capture_(false), - case_sensitive_(true), - perl_classes_(false), - word_boundary_(false), - one_line_(false) { -} - -RE2::Options::Options(RE2::CannedOptions opt) - : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), - posix_syntax_(opt == RE2::POSIX_SYNTAX), - longest_match_(opt == RE2::POSIX_SYNTAX), - log_errors_(opt != RE2::Quiet), - max_mem_(kDefaultMaxMem), - literal_(false), - never_nl_(false), - never_capture_(false), - case_sensitive_(true), - perl_classes_(false), - word_boundary_(false), - one_line_(false) { -} - -// static empty things for use as const references. -// To avoid global constructors, initialized on demand. -GLOBAL_MUTEX(empty_mutex); -static const string *empty_string; -static const map *empty_named_groups; -static const map *empty_group_names; - -static void InitEmpty() { - GLOBAL_MUTEX_LOCK(empty_mutex); - if (empty_string == NULL) { - empty_string = new string; - empty_named_groups = new map; - empty_group_names = new map; - } - GLOBAL_MUTEX_UNLOCK(empty_mutex); -} +// Commonly-used option sets; arguments to constructor are: +// utf8 input +// posix syntax +// longest match +// log errors +const RE2::Options RE2::DefaultOptions; // EncodingUTF8, false, false, true +const RE2::Options RE2::Latin1(RE2::Options::EncodingLatin1, false, false, true); +//const RE2::Options RE2::POSIX(RE2::Options::EncodingUTF8, true, true, true); +const RE2::Options RE2::Quiet(RE2::Options::EncodingUTF8, false, false, false); + +// If a regular expression has no error, its error_ field points here +static const string empty_string; // Converts from Regexp error code to RE2 error code. // Maybe some day they will diverge. In any event, this @@ -170,9 +134,6 @@ int RE2::Options::ParseFlags() const { if (never_nl()) flags |= Regexp::NeverNL; - if (never_capture()) - flags |= Regexp::NeverCapture; - if (!case_sensitive()) flags |= Regexp::FoldCase; @@ -192,8 +153,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { mutex_ = new Mutex; pattern_ = pattern.as_string(); options_.Copy(options); - InitEmpty(); - error_ = empty_string; + error_ = &empty_string; error_code_ = NoError; suffix_regexp_ = NULL; entire_regexp_ = NULL; @@ -209,7 +169,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { static_cast(options_.ParseFlags()), &status); if (entire_regexp_ == NULL) { - if (error_ == empty_string) + if (error_ == &empty_string) error_ = new string(status.Text()); if (options_.log_errors()) { LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': " @@ -251,7 +211,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) { // Returns rprog_, computing it if needed. re2::Prog* RE2::ReverseProg() const { MutexLock l(mutex_); - if (rprog_ == NULL && error_ == empty_string) { + if (rprog_ == NULL && error_ == &empty_string) { rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3); if (rprog_ == NULL) { if (options_.log_errors()) @@ -264,6 +224,9 @@ re2::Prog* RE2::ReverseProg() const { return rprog_; } +static const map empty_named_groups; +static const map empty_group_names; + RE2::~RE2() { if (suffix_regexp_) suffix_regexp_->Decref(); @@ -272,11 +235,11 @@ RE2::~RE2() { delete mutex_; delete prog_; delete rprog_; - if (error_ != empty_string) + if (error_ != &empty_string) delete error_; - if (named_groups_ != NULL && named_groups_ != empty_named_groups) + if (named_groups_ != NULL && named_groups_ != &empty_named_groups) delete named_groups_; - if (group_names_ != NULL && group_names_ != empty_group_names) + if (group_names_ != NULL && group_names_ != &empty_group_names) delete group_names_; } @@ -290,11 +253,11 @@ int RE2::ProgramSize() const { const map& RE2::NamedCapturingGroups() const { MutexLock l(mutex_); if (!ok()) - return *empty_named_groups; + return empty_named_groups; if (named_groups_ == NULL) { named_groups_ = suffix_regexp_->NamedCaptures(); if (named_groups_ == NULL) - named_groups_ = empty_named_groups; + named_groups_ = &empty_named_groups; } return *named_groups_; } @@ -303,11 +266,11 @@ const map& RE2::NamedCapturingGroups() const { const map& RE2::CapturingGroupNames() const { MutexLock l(mutex_); if (!ok()) - return *empty_group_names; + return empty_group_names; if (group_names_ == NULL) { group_names_ = suffix_regexp_->CaptureNames(); if (group_names_ == NULL) - group_names_ = empty_group_names; + group_names_ = &empty_group_names; } return *group_names_; } @@ -348,7 +311,7 @@ bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, // Returns the maximum submatch needed for the rewrite to be done by Replace(). // E.g. if rewrite == "foo \\2,\\1", returns 2. -int RE2::MaxSubmatch(const StringPiece& rewrite) { +static int MaxSubmatch(const StringPiece& rewrite) { int max = 0; for (const char *s = rewrite.data(), *end = s + rewrite.size(); s < end; s++) { diff --git a/third_party/re2/re2/re2.h b/third_party/re2/re2/re2.h index 98b06b8..6cc4e8e 100644 --- a/third_party/re2/re2/re2.h +++ b/third_party/re2/re2/re2.h @@ -5,6 +5,8 @@ #ifndef RE2_RE2_H #define RE2_RE2_H +#define kDefaultMaxMem (8<<20) + // C++ interface to the re2 regular-expression library. // RE2 supports Perl-style regular expressions (with extensions like // \d, \w, \s, ...). @@ -187,28 +189,12 @@ #include "re2/variadic_function.h" namespace re2 { - using std::string; using std::map; class Mutex; class Prog; class Regexp; -// The following enum should be used only as a constructor argument to indicate -// that the variable has static storage class, and that the constructor should -// do nothing to its state. It indicates to the reader that it is legal to -// declare a static instance of the class, provided the constructor is given -// the LINKER_INITIALIZED argument. Normally, it is unsafe to declare a -// static variable that has a constructor or a destructor because invocation -// order is undefined. However, IF the type can be initialized by filling with -// zeroes (which the loader does for static variables), AND the type's -// destructor does nothing to the storage, then a constructor for static -// initialization can be declared as -// explicit MyClass(LinkerInitialized x) {} -// and invoked as -// static MyClass my_variable_name(LINKER_INITIALIZED); -enum LinkerInitialized { LINKER_INITIALIZED }; - // Interface for regular expression matching. Also corresponds to a // pre-compiled regular expression. An "RE2" object is safe for // concurrent use by multiple threads. @@ -245,15 +231,12 @@ class RE2 { // Predefined common options. // If you need more complicated things, instantiate - // an Option class, possibly passing one of these to - // the Option constructor, change the settings, and pass that - // Option class to the RE2 constructor. - enum CannedOptions { - DefaultOptions = 0, - Latin1, // treat input as Latin-1 (default UTF-8) - POSIX_SYNTAX, // POSIX syntax, leftmost-longest match - Quiet // do not log about regexp parse errors - }; + // an Option class, change the settings, and pass it to the + // RE2 constructor. + static const Options DefaultOptions; + static const Options Latin1; // treat input as Latin-1 (default UTF-8) + //static const Options POSIX; // POSIX syntax, leftmost-longest match + static const Options Quiet; // do not log about regexp parse errors // Need to have the const char* and const string& forms for implicit // conversions when passing string literals to FullMatch and PartialMatch. @@ -486,20 +469,6 @@ class RE2 { // fail because of a bad rewrite string. bool CheckRewriteString(const StringPiece& rewrite, string* error) const; - // Returns the maximum submatch needed for the rewrite to be done by - // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2. - static int MaxSubmatch(const StringPiece& rewrite); - - // Append the "rewrite" string, with backslash subsitutions from "vec", - // to string "out". - // Returns true on success. This method can fail because of a malformed - // rewrite string. CheckRewriteString guarantees that the rewrite will - // be sucessful. - bool Rewrite(string *out, - const StringPiece &rewrite, - const StringPiece* vec, - int veclen) const; - // Constructor options class Options { public: @@ -512,7 +481,6 @@ class RE2 { // max_mem (see below) approx. max memory footprint of RE2 // literal (false) interpret string as literal, not regexp // never_nl (false) never match \n, even if it is in regexp - // never_capture (false) parse all parens as non-capturing // case_sensitive (true) match is case-sensitive (regexp can override // with (?i) unless in posix_syntax mode) // @@ -551,18 +519,24 @@ class RE2 { // Once a DFA fills its budget, it flushes its cache and starts over. // If this happens too often, RE2 falls back on the NFA implementation. - // For now, make the default budget something close to Code Search. -#ifndef WIN32 - static const int kDefaultMaxMem = 8<<20; -#endif - enum Encoding { EncodingUTF8 = 1, EncodingLatin1 }; - Options(); - /*implicit*/ Options(CannedOptions); + Options() : + encoding_(EncodingUTF8), + posix_syntax_(false), + longest_match_(false), + log_errors_(true), + max_mem_(kDefaultMaxMem), + literal_(false), + never_nl_(false), + case_sensitive_(true), + perl_classes_(false), + word_boundary_(false), + one_line_(false) { + } Encoding encoding() const { return encoding_; } void set_encoding(Encoding encoding) { encoding_ = encoding; } @@ -596,9 +570,6 @@ class RE2 { bool never_nl() const { return never_nl_; } void set_never_nl(bool b) { never_nl_ = b; } - bool never_capture() const { return never_capture_; } - void set_never_capture(bool b) { never_capture_ = b; } - bool case_sensitive() const { return case_sensitive_; } void set_case_sensitive(bool b) { case_sensitive_ = b; } @@ -619,7 +590,6 @@ class RE2 { max_mem_ = src.max_mem_; literal_ = src.literal_; never_nl_ = src.never_nl_; - never_capture_ = src.never_capture_; case_sensitive_ = src.case_sensitive_; perl_classes_ = src.perl_classes_; word_boundary_ = src.word_boundary_; @@ -629,6 +599,25 @@ class RE2 { int ParseFlags() const; private: + // Private constructor for defining constants like RE2::Latin1. + friend class RE2; + Options(Encoding encoding, + bool posix_syntax, + bool longest_match, + bool log_errors) : + encoding_(encoding), + posix_syntax_(posix_syntax), + longest_match_(longest_match), + log_errors_(log_errors), + max_mem_(kDefaultMaxMem), + literal_(false), + never_nl_(false), + case_sensitive_(true), + perl_classes_(false), + word_boundary_(false), + one_line_(false) { + } + Encoding encoding_; bool posix_syntax_; bool longest_match_; @@ -636,7 +625,6 @@ class RE2 { int64_t max_mem_; bool literal_; bool never_nl_; - bool never_capture_; bool case_sensitive_; bool perl_classes_; bool word_boundary_; @@ -681,6 +669,11 @@ class RE2 { private: void Init(const StringPiece& pattern, const Options& options); + bool Rewrite(string *out, + const StringPiece &rewrite, + const StringPiece* vec, + int veclen) const; + bool DoMatch(const StringPiece& text, Anchor anchor, int* consumed, diff --git a/third_party/re2/re2/regexp.cc b/third_party/re2/re2/regexp.cc index a74ceec..9486b3c 100644 --- a/third_party/re2/re2/regexp.cc +++ b/third_party/re2/re2/regexp.cc @@ -59,39 +59,29 @@ bool Regexp::QuickDestroy() { return false; } -static map *ref_map; -GLOBAL_MUTEX(ref_mutex); +static map ref_map; +static Mutex ref_mutex; int Regexp::Ref() { if (ref_ < kMaxRef) return ref_; - GLOBAL_MUTEX_LOCK(ref_mutex); - int r = 0; - if (ref_map != NULL) { - r = (*ref_map)[this]; - } - GLOBAL_MUTEX_UNLOCK(ref_mutex); - return r; + MutexLock l(&ref_mutex); + return ref_map[this]; } // Increments reference count, returns object as convenience. Regexp* Regexp::Incref() { if (ref_ >= kMaxRef-1) { // Store ref count in overflow map. - GLOBAL_MUTEX_LOCK(ref_mutex); - if (ref_map == NULL) { - ref_map = new map; - } - if (ref_ == kMaxRef) { - // already overflowed - (*ref_map)[this]++; - } else { - // overflowing now - (*ref_map)[this] = kMaxRef; - ref_ = kMaxRef; + MutexLock l(&ref_mutex); + if (ref_ == kMaxRef) { // already overflowed + ref_map[this]++; + return this; } - GLOBAL_MUTEX_UNLOCK(ref_mutex); + // overflowing now + ref_map[this] = kMaxRef; + ref_ = kMaxRef; return this; } @@ -103,15 +93,14 @@ Regexp* Regexp::Incref() { void Regexp::Decref() { if (ref_ == kMaxRef) { // Ref count is stored in overflow map. - GLOBAL_MUTEX_LOCK(ref_mutex); - int r = (*ref_map)[this] - 1; + MutexLock l(&ref_mutex); + int r = ref_map[this] - 1; if (r < kMaxRef) { ref_ = r; - ref_map->erase(this); + ref_map.erase(this); } else { - (*ref_map)[this] = r; + ref_map[this] = r; } - GLOBAL_MUTEX_UNLOCK(ref_mutex); return; } ref_--; @@ -458,7 +447,7 @@ bool Regexp::Equal(Regexp* a, Regexp* b) { } // Keep in sync with enum RegexpStatusCode in regexp.h -static const char *kErrorStrings[] = { +static const string kErrorStrings[] = { "no error", "unexpected error", "invalid escape sequence", @@ -475,7 +464,7 @@ static const char *kErrorStrings[] = { "invalid named capture group", }; -string RegexpStatus::CodeText(enum RegexpStatusCode code) { +const string& RegexpStatus::CodeText(enum RegexpStatusCode code) { if (code < 0 || code >= arraysize(kErrorStrings)) code = kRegexpInternalError; return kErrorStrings[code]; diff --git a/third_party/re2/re2/regexp.h b/third_party/re2/re2/regexp.h index 331c017..1aebc16 100644 --- a/third_party/re2/re2/regexp.h +++ b/third_party/re2/re2/regexp.h @@ -197,7 +197,7 @@ class RegexpStatus { // Returns text equivalent of code, e.g.: // "Bad character class" - static string CodeText(enum RegexpStatusCode code); + static const string& CodeText(enum RegexpStatusCode code); // Returns text describing error, e.g.: // "Bad character class: [z-a]" @@ -299,7 +299,6 @@ class Regexp { // and \P{Han} for its negation. NeverNL = 1<<11, // Never match NL, even if the regexp mentions // it explicitly. - NeverCapture = 1<<12, // Parse all parens as non-capturing. // As close to Perl as we can get. LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX | diff --git a/third_party/re2/re2/testing/filtered_re2_test.cc b/third_party/re2/re2/testing/filtered_re2_test.cc index e3a0dd1..7755d30 100644 --- a/third_party/re2/re2/testing/filtered_re2_test.cc +++ b/third_party/re2/re2/testing/filtered_re2_test.cc @@ -39,23 +39,6 @@ TEST(FilteredRE2Test, SmallOrTest) { EXPECT_EQ(id, v.matches[0]); } -TEST(FilteredRE2Test, SmallLatinTest) { - FLAGS_filtered_re2_min_atom_len = 3; - FilterTestVars v; - int id; - - v.opts.set_utf8(false); - v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id); - v.f.Compile(&v.atoms); - EXPECT_EQ(1, v.atoms.size()); - EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef"); - - v.atom_indices.push_back(0); - v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches); - EXPECT_EQ(1, v.matches.size()); - EXPECT_EQ(id, v.matches[0]); -} - struct AtomTest { const char* testname; // If any test needs more than this many regexps or atoms, increase diff --git a/third_party/re2/re2/testing/parse_test.cc b/third_party/re2/re2/testing/parse_test.cc index f67b477..f895316 100644 --- a/third_party/re2/re2/testing/parse_test.cc +++ b/third_party/re2/re2/testing/parse_test.cc @@ -11,19 +11,11 @@ namespace re2 { -static const Regexp::ParseFlags TestZeroFlags = Regexp::ParseFlags(1<<30); - struct Test { const char* regexp; const char* parse; - Regexp::ParseFlags flags; }; -static Regexp::ParseFlags kTestFlags = Regexp::MatchNL | - Regexp::PerlX | - Regexp::PerlClasses | - Regexp::UnicodeGroups; - static Test tests[] = { // Base cases { "a", "lit{a}" }, @@ -146,54 +138,13 @@ static Test tests[] = { // Strings { "abcde", "str{abcde}" }, { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" }, - - // Reported bug involving \n leaking in despite use of NeverNL. - { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags }, - { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, - { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", TestZeroFlags }, - { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, - { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", TestZeroFlags }, - { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, - { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", TestZeroFlags }, - { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, - { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags }, - { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase }, - { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL }, - { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase }, - { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses }, - { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses | Regexp::FoldCase }, - { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses | Regexp::NeverNL }, - { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase }, - { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses }, - { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses | Regexp::FoldCase }, - { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses | Regexp::NeverNL }, - { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", - Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase }, }; +static Regexp::ParseFlags kTestFlags = Regexp::MatchNL | + Regexp::PerlX | + Regexp::PerlClasses | + Regexp::UnicodeGroups; + bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) { return Regexp::Equal(a, b); } @@ -203,16 +154,12 @@ void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags, Regexp** re = new Regexp*[ntests]; for (int i = 0; i < ntests; i++) { RegexpStatus status; - Regexp::ParseFlags f = flags; - if (tests[i].flags != 0) { - f = tests[i].flags & ~TestZeroFlags; - } - re[i] = Regexp::Parse(tests[i].regexp, f, &status); + re[i] = Regexp::Parse(tests[i].regexp, flags, &status); CHECK(re[i] != NULL) << " " << tests[i].regexp << " " << status.Text(); string s = re[i]->Dump(); EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp - << "\nparse: " << tests[i].parse << " s: " << s << " flag=" << f; + << "\nparse: " << tests[i].parse << " s: " << s; } for (int i = 0; i < ntests; i++) { @@ -381,14 +328,10 @@ TEST(TestParse, InvalidRegexps) { TEST(TestToString, EquivalentParse) { for (int i = 0; i < arraysize(tests); i++) { RegexpStatus status; - Regexp::ParseFlags f = kTestFlags; - if (tests[i].flags != 0) { - f = tests[i].flags & ~TestZeroFlags; - } - Regexp* re = Regexp::Parse(tests[i].regexp, f, &status); + Regexp* re = Regexp::Parse(tests[i].regexp, kTestFlags, &status); CHECK(re != NULL) << " " << tests[i].regexp << " " << status.Text(); string s = re->Dump(); - EXPECT_EQ(string(tests[i].parse), s) << " " << tests[i].regexp << " " << string(tests[i].parse) << " " << s; + EXPECT_EQ(string(tests[i].parse), s); string t = re->ToString(); if (t != tests[i].regexp) { // If ToString didn't return the original regexp, diff --git a/third_party/re2/re2/testing/re2_arg_test.cc b/third_party/re2/re2/testing/re2_arg_test.cc index ae7a7b0..0a77d95 100644 --- a/third_party/re2/re2/testing/re2_arg_test.cc +++ b/third_party/re2/re2/testing/re2_arg_test.cc @@ -57,8 +57,7 @@ const SuccessTable kSuccessTable[] = { // -2^15-1 to -2^31 { "-32769", -32769, { false, false, true, false, true, false }}, { "-2147483648", - static_cast(0xFFFFFFFF80000000LL), -{ false, false, true, false, true, false }}, + 0xFFFFFFFF80000000LL, { false, false, true, false, true, false }}, // 2^31 to 2^32-1 { "2147483648", 2147483648U, { false, false, false, true, true, true }}, @@ -71,14 +70,14 @@ const SuccessTable kSuccessTable[] = { // -2^31-1 to -2^63 { "-2147483649", -2147483649LL, { false, false, false, false, true, false }}, -{ "-9223372036854775808", static_cast(0x8000000000000000LL), - { false, false, false, false, true, false }}, +{ "-9223372036854775808", + 0x8000000000000000LL, { false, false, false, false, true, false }}, // 2^63 to 2^64-1 -{ "9223372036854775808", static_cast(9223372036854775808ULL), - { false, false, false, false, false, true }}, -{ "18446744073709551615", static_cast(18446744073709551615ULL), - { false, false, false, false, false, true }}, +{ "9223372036854775808", + 9223372036854775808ULL, { false, false, false, false, false, true }}, +{ "18446744073709551615", + 18446744073709551615ULL, { false, false, false, false, false, true }}, // >= 2^64 { "18446744073709551616", 0, { false, false, false, false, false, false }}, diff --git a/third_party/re2/re2/testing/re2_test.cc b/third_party/re2/re2/testing/re2_test.cc index 911e868..ef5d4aa 100644 --- a/third_party/re2/re2/testing/re2_test.cc +++ b/third_party/re2/re2/testing/re2_test.cc @@ -757,18 +757,18 @@ TEST(RE2, FullMatchTypeTests) { CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); - snprintf(buf, sizeof(buf), "%lld", (long long int)max); + snprintf(buf, sizeof(buf), "%lld", max); CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); - snprintf(buf, sizeof(buf), "%lld", (long long int)min); + snprintf(buf, sizeof(buf), "%lld", min); CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min); - snprintf(buf, sizeof(buf), "%lld", (long long int)max); + snprintf(buf, sizeof(buf), "%lld", max); assert(buf[strlen(buf)-1] != '9'); buf[strlen(buf)-1]++; CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); - snprintf(buf, sizeof(buf), "%lld", (long long int)min); + snprintf(buf, sizeof(buf), "%lld", min); assert(buf[strlen(buf)-1] != '9'); buf[strlen(buf)-1]++; CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); @@ -782,7 +782,7 @@ TEST(RE2, FullMatchTypeTests) { CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100); - snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); + snprintf(buf, sizeof(buf), "%llu", max); CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); assert(buf[strlen(buf)-1] != '9'); @@ -1253,14 +1253,6 @@ TEST(RE2, NeverNewline) { } } -// Check that there are no capturing groups in "never capture" mode. -TEST(RE2, NeverCapture) { - RE2::Options opt; - opt.set_never_capture(true); - RE2 re("(r)(e)", opt); - EXPECT_EQ(0, re.NumberOfCapturingGroups()); -} - // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. // Triggered by a failed DFA search falling back to Bitstate when // using Match with a NULL submatch set. Bitstate tried to read diff --git a/third_party/re2/re2/testing/set_test.cc b/third_party/re2/re2/testing/set_test.cc index 74058a4..89aed80 100644 --- a/third_party/re2/re2/testing/set_test.cc +++ b/third_party/re2/re2/testing/set_test.cc @@ -69,18 +69,6 @@ TEST(Set, UnanchoredFactored) { CHECK_EQ(v.size(), 0); } -TEST(Set, UnanchoredDollar) { - RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED); - - CHECK_EQ(s.Add("foo$", NULL), 0); - CHECK_EQ(s.Compile(), true); - - vector v; - CHECK_EQ(s.Match("foo", &v), true); - CHECK_EQ(v.size(), 1); - CHECK_EQ(v[0], 0); -} - TEST(Set, Anchored) { RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH); diff --git a/third_party/re2/util/logging.h b/third_party/re2/util/logging.h index 7302ea6..53f7198 100644 --- a/third_party/re2/util/logging.h +++ b/third_party/re2/util/logging.h @@ -53,24 +53,17 @@ class LogMessage { public: - LogMessage(const char* file, int line) : flushed_(false) { + LogMessage(const char* file, int line) { stream() << file << ":" << line << ": "; } - void Flush() { + ~LogMessage() { stream() << "\n"; string s = str_.str(); if(write(2, s.data(), s.size()) < 0) {} // shut up gcc - flushed_ = true; - } - ~LogMessage() { - if (!flushed_) { - Flush(); - } } ostream& stream() { return str_; } private: - bool flushed_; std::ostringstream str_; DISALLOW_EVIL_CONSTRUCTORS(LogMessage); }; @@ -80,7 +73,7 @@ class LogMessageFatal : public LogMessage { LogMessageFatal(const char* file, int line) : LogMessage(file, line) { } ~LogMessageFatal() { - Flush(); + std::cerr << "\n"; abort(); } private: diff --git a/third_party/re2/util/mutex.h b/third_party/re2/util/mutex.h index e321fae..4bb6fec 100644 --- a/third_party/re2/util/mutex.h +++ b/third_party/re2/util/mutex.h @@ -76,7 +76,7 @@ class Mutex { MutexType mutex_; // Catch the error of writing Mutex when intending MutexLock. - Mutex(Mutex *ignored); + Mutex(Mutex *ignored) {} // Disallow "evil" constructors Mutex(const Mutex&); void operator=(const Mutex&); @@ -189,27 +189,6 @@ class WriterMutexLock { #define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name) #define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name) -// Provide safe way to declare and use global, linker-initialized mutex. Sigh. -#ifdef HAVE_PTHREAD - -#define GLOBAL_MUTEX(name) \ - static pthread_mutex_t (name) = PTHREAD_MUTEX_INITIALIZER -#define GLOBAL_MUTEX_LOCK(name) \ - pthread_mutex_lock(&(name)) -#define GLOBAL_MUTEX_UNLOCK(name) \ - pthread_mutex_unlock(&(name)) - -#else - -#define GLOBAL_MUTEX(name) \ - static Mutex name -#define GLOBAL_MUTEX_LOCK(name) \ - name.Lock() -#define GLOBAL_MUTEX_UNLOCK(name) \ - name.Unlock() - -#endif - } // namespace re2 #endif /* #define RE2_UTIL_MUTEX_H_ */ diff --git a/third_party/re2/util/sparse_array.h b/third_party/re2/util/sparse_array.h index 3e33f89..c024bed 100644 --- a/third_party/re2/util/sparse_array.h +++ b/third_party/re2/util/sparse_array.h @@ -224,14 +224,13 @@ class SparseArray { int max_size_; int* sparse_to_dense_; vector dense_; - bool valgrind_; DISALLOW_EVIL_CONSTRUCTORS(SparseArray); }; template SparseArray::SparseArray() - : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(), valgrind_(RunningOnValgrind()) {} + : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_() {} // IndexValue pairs: exposed in SparseArray::iterator. template @@ -273,7 +272,7 @@ void SparseArray::resize(int new_max_size) { if (sparse_to_dense_) { memmove(a, sparse_to_dense_, max_size_*sizeof a[0]); // Don't need to zero the memory but appease Valgrind. - if (valgrind_) { + if (RunningOnValgrind()) { for (int i = max_size_; i < new_max_size; i++) a[i] = 0xababababU; } @@ -418,10 +417,9 @@ void SparseArray::create_index(int i) { template SparseArray::SparseArray(int max_size) { max_size_ = max_size; sparse_to_dense_ = new int[max_size]; - valgrind_ = RunningOnValgrind(); dense_.resize(max_size); // Don't need to zero the new memory, but appease Valgrind. - if (valgrind_) { + if (RunningOnValgrind()) { for (int i = 0; i < max_size; i++) { sparse_to_dense_[i] = 0xababababU; dense_[i].index_ = 0xababababU; diff --git a/third_party/re2/util/sparse_set.h b/third_party/re2/util/sparse_set.h index 165dd09..9cb5753 100644 --- a/third_party/re2/util/sparse_set.h +++ b/third_party/re2/util/sparse_set.h @@ -54,16 +54,15 @@ namespace re2 { class SparseSet { public: SparseSet() - : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL), valgrind_(RunningOnValgrind()) {} + : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL) {} SparseSet(int max_size) { max_size_ = max_size; sparse_to_dense_ = new int[max_size]; dense_ = new int[max_size]; - valgrind_ = RunningOnValgrind(); // Don't need to zero the memory, but do so anyway // to appease Valgrind. - if (valgrind_) { + if (RunningOnValgrind()) { for (int i = 0; i < max_size; i++) { dense_[i] = 0xababababU; sparse_to_dense_[i] = 0xababababU; @@ -95,7 +94,7 @@ class SparseSet { int* a = new int[new_max_size]; if (sparse_to_dense_) { memmove(a, sparse_to_dense_, max_size_*sizeof a[0]); - if (valgrind_) { + if (RunningOnValgrind()) { for (int i = max_size_; i < new_max_size; i++) a[i] = 0xababababU; } @@ -106,7 +105,7 @@ class SparseSet { a = new int[new_max_size]; if (dense_) { memmove(a, dense_, size_*sizeof a[0]); - if (valgrind_) { + if (RunningOnValgrind()) { for (int i = size_; i < new_max_size; i++) a[i] = 0xababababU; } @@ -169,7 +168,6 @@ class SparseSet { int max_size_; int* sparse_to_dense_; int* dense_; - bool valgrind_; DISALLOW_EVIL_CONSTRUCTORS(SparseSet); }; diff --git a/third_party/re2/util/util.h b/third_party/re2/util/util.h index 11b5f4a..0a15a81 100644 --- a/third_party/re2/util/util.h +++ b/third_party/re2/util/util.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/third_party/re2/util/valgrind.cc b/third_party/re2/util/valgrind.cc index 46f804b..749bb59 100644 --- a/third_party/re2/util/valgrind.cc +++ b/third_party/re2/util/valgrind.cc @@ -7,12 +7,18 @@ namespace re2 { -int RunningOnValgrind() { +static bool checkValgrind() { #ifdef RUNNING_ON_VALGRIND return RUNNING_ON_VALGRIND; #else - return 0; + return false; #endif } +static const int valgrind = checkValgrind(); + +int RunningOnValgrind() { + return valgrind; +} + } // namespace re2 diff --git a/third_party/re2/util/valgrind.h b/third_party/re2/util/valgrind.h index d097b0c..3cfd6fe 100644 --- a/third_party/re2/util/valgrind.h +++ b/third_party/re2/util/valgrind.h @@ -4063,7 +4063,6 @@ typedef #endif /* PLAT_ppc64_aix5 */ - #ifndef WIN32 /* ------------------------------------------------------------------ */ /* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ -- cgit v1.1