summaryrefslogtreecommitdiffstats
path: root/third_party
diff options
context:
space:
mode:
authoresprehn@chromium.org <esprehn@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538>2013-04-24 19:14:53 +0000
committeresprehn@chromium.org <esprehn@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538>2013-04-24 19:14:53 +0000
commitba5b5026c3b0abfd64e5e2c06c113c18275d4c65 (patch)
tree50d61b75eee4ebede021331bbf562d8a5c1c9364 /third_party
parentf16ae9634ffbe5294c5fb558b6ab056ed38f624c (diff)
downloadchromium_src-ba5b5026c3b0abfd64e5e2c06c113c18275d4c65.zip
chromium_src-ba5b5026c3b0abfd64e5e2c06c113c18275d4c65.tar.gz
chromium_src-ba5b5026c3b0abfd64e5e2c06c113c18275d4c65.tar.bz2
Remove YARR, we don't use it anymore
Now that RegularExpression uses V8 (r148958) nothing uses YARR and we can remove it. Review URL: https://codereview.chromium.org/14051006 git-svn-id: svn://svn.chromium.org/blink/trunk@149026 bbb929c8-8fbe-4397-9dbb-9b2b20218538
Diffstat (limited to 'third_party')
-rw-r--r--third_party/WebKit/Source/bindings/bindings.gyp1
-rw-r--r--third_party/WebKit/Source/core/core.gyp/core.gyp3
-rw-r--r--third_party/WebKit/Source/yarr/Yarr.h69
-rw-r--r--third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.cpp463
-rw-r--r--third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.h138
-rw-r--r--third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.js219
-rw-r--r--third_party/WebKit/Source/yarr/YarrInterpreter.cpp1959
-rw-r--r--third_party/WebKit/Source/yarr/YarrInterpreter.h380
-rw-r--r--third_party/WebKit/Source/yarr/YarrParser.h880
-rw-r--r--third_party/WebKit/Source/yarr/YarrPattern.cpp880
-rw-r--r--third_party/WebKit/Source/yarr/YarrPattern.h410
-rw-r--r--third_party/WebKit/Source/yarr/YarrSyntaxChecker.cpp59
-rw-r--r--third_party/WebKit/Source/yarr/YarrSyntaxChecker.h38
-rw-r--r--third_party/WebKit/Source/yarr/create_regex_tables121
-rw-r--r--third_party/WebKit/Source/yarr/yarr.gyp100
15 files changed, 0 insertions, 5720 deletions
diff --git a/third_party/WebKit/Source/bindings/bindings.gyp b/third_party/WebKit/Source/bindings/bindings.gyp
index 1f56f42..da85411 100644
--- a/third_party/WebKit/Source/bindings/bindings.gyp
+++ b/third_party/WebKit/Source/bindings/bindings.gyp
@@ -45,7 +45,6 @@
'dependencies': [
'../config.gyp:config',
'../wtf/wtf.gyp:wtf',
- '../yarr/yarr.gyp:yarr',
'../core/core.gyp/core.gyp:webcore',
'<(DEPTH)/build/temp_gyp/googleurl.gyp:googleurl',
'<(DEPTH)/skia/skia.gyp:skia',
diff --git a/third_party/WebKit/Source/core/core.gyp/core.gyp b/third_party/WebKit/Source/core/core.gyp/core.gyp
index 41129d6..0e3c318 100644
--- a/third_party/WebKit/Source/core/core.gyp/core.gyp
+++ b/third_party/WebKit/Source/core/core.gyp/core.gyp
@@ -352,7 +352,6 @@
'injected_canvas_script_source',
'injected_script_source',
'debugger_script_source',
- '../../yarr/yarr.gyp:yarr',
'../../wtf/wtf.gyp:wtf',
'<(DEPTH)/build/temp_gyp/googleurl.gyp:googleurl',
'<(DEPTH)/skia/skia.gyp:skia',
@@ -484,7 +483,6 @@
'core_derived_sources.gyp:make_derived_sources',
'../../bindings/derived_sources.gyp:bindings_derived_sources',
'../../Platform/Platform.gyp/Platform.gyp:webkit_platform',
- '../../yarr/yarr.gyp:yarr',
'../../wtf/wtf.gyp:wtf',
'../../config.gyp:config',
'<(DEPTH)/build/temp_gyp/googleurl.gyp:googleurl',
@@ -507,7 +505,6 @@
],
'export_dependent_settings': [
'../../Platform/Platform.gyp/Platform.gyp:webkit_platform',
- '../../yarr/yarr.gyp:yarr',
'../../wtf/wtf.gyp:wtf',
'../../config.gyp:config',
'<(DEPTH)/build/temp_gyp/googleurl.gyp:googleurl',
diff --git a/third_party/WebKit/Source/yarr/Yarr.h b/third_party/WebKit/Source/yarr/Yarr.h
deleted file mode 100644
index ffbccb1..0000000
--- a/third_party/WebKit/Source/yarr/Yarr.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef Yarr_h
-#define Yarr_h
-
-#include "yarr/YarrInterpreter.h"
-#include "yarr/YarrPattern.h"
-
-namespace JSC { namespace Yarr {
-
-#define YarrStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
-#define YarrStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
-#define YarrStackSpaceForBackTrackInfoBackReference 2
-#define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
-#define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1
-#define YarrStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
-#define YarrStackSpaceForBackTrackInfoParenthesesTerminal 1
-#define YarrStackSpaceForBackTrackInfoParentheses 2
-
-static const unsigned quantifyInfinite = UINT_MAX;
-static const unsigned offsetNoMatch = (unsigned)-1;
-
-// The below limit restricts the number of "recursive" match calls in order to
-// avoid spending exponential time on complex regular expressions.
-static const unsigned matchLimit = 1000000;
-
-enum JSRegExpResult {
- JSRegExpMatch = 1,
- JSRegExpNoMatch = 0,
- JSRegExpErrorNoMatch = -1,
- JSRegExpErrorHitLimit = -2,
- JSRegExpErrorNoMemory = -3,
- JSRegExpErrorInternal = -4
-};
-
-enum YarrCharSize {
- Char8,
- Char16
-};
-
-} } // namespace JSC::Yarr
-
-#endif // Yarr_h
-
diff --git a/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.cpp b/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.cpp
deleted file mode 100644
index 7bb3d08..0000000
--- a/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.cpp
+++ /dev/null
@@ -1,463 +0,0 @@
-/*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js
-
-#include "config.h"
-#include "YarrCanonicalizeUCS2.h"
-
-namespace JSC { namespace Yarr {
-
-#include <stdint.h>
-
-uint16_t ucs2CharacterSet0[] = { 0x01c4u, 0x01c5u, 0x01c6u, 0 };
-uint16_t ucs2CharacterSet1[] = { 0x01c7u, 0x01c8u, 0x01c9u, 0 };
-uint16_t ucs2CharacterSet2[] = { 0x01cau, 0x01cbu, 0x01ccu, 0 };
-uint16_t ucs2CharacterSet3[] = { 0x01f1u, 0x01f2u, 0x01f3u, 0 };
-uint16_t ucs2CharacterSet4[] = { 0x0392u, 0x03b2u, 0x03d0u, 0 };
-uint16_t ucs2CharacterSet5[] = { 0x0395u, 0x03b5u, 0x03f5u, 0 };
-uint16_t ucs2CharacterSet6[] = { 0x0398u, 0x03b8u, 0x03d1u, 0 };
-uint16_t ucs2CharacterSet7[] = { 0x0345u, 0x0399u, 0x03b9u, 0x1fbeu, 0 };
-uint16_t ucs2CharacterSet8[] = { 0x039au, 0x03bau, 0x03f0u, 0 };
-uint16_t ucs2CharacterSet9[] = { 0x00b5u, 0x039cu, 0x03bcu, 0 };
-uint16_t ucs2CharacterSet10[] = { 0x03a0u, 0x03c0u, 0x03d6u, 0 };
-uint16_t ucs2CharacterSet11[] = { 0x03a1u, 0x03c1u, 0x03f1u, 0 };
-uint16_t ucs2CharacterSet12[] = { 0x03a3u, 0x03c2u, 0x03c3u, 0 };
-uint16_t ucs2CharacterSet13[] = { 0x03a6u, 0x03c6u, 0x03d5u, 0 };
-uint16_t ucs2CharacterSet14[] = { 0x1e60u, 0x1e61u, 0x1e9bu, 0 };
-
-static const size_t UCS2_CANONICALIZATION_SETS = 15;
-uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {
- ucs2CharacterSet0,
- ucs2CharacterSet1,
- ucs2CharacterSet2,
- ucs2CharacterSet3,
- ucs2CharacterSet4,
- ucs2CharacterSet5,
- ucs2CharacterSet6,
- ucs2CharacterSet7,
- ucs2CharacterSet8,
- ucs2CharacterSet9,
- ucs2CharacterSet10,
- ucs2CharacterSet11,
- ucs2CharacterSet12,
- ucs2CharacterSet13,
- ucs2CharacterSet14,
-};
-
-const size_t UCS2_CANONICALIZATION_RANGES = 364;
-UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {
- { 0x0000u, 0x0040u, 0x0000u, CanonicalizeUnique },
- { 0x0041u, 0x005au, 0x0020u, CanonicalizeRangeLo },
- { 0x005bu, 0x0060u, 0x0000u, CanonicalizeUnique },
- { 0x0061u, 0x007au, 0x0020u, CanonicalizeRangeHi },
- { 0x007bu, 0x00b4u, 0x0000u, CanonicalizeUnique },
- { 0x00b5u, 0x00b5u, 0x0009u, CanonicalizeSet },
- { 0x00b6u, 0x00bfu, 0x0000u, CanonicalizeUnique },
- { 0x00c0u, 0x00d6u, 0x0020u, CanonicalizeRangeLo },
- { 0x00d7u, 0x00d7u, 0x0000u, CanonicalizeUnique },
- { 0x00d8u, 0x00deu, 0x0020u, CanonicalizeRangeLo },
- { 0x00dfu, 0x00dfu, 0x0000u, CanonicalizeUnique },
- { 0x00e0u, 0x00f6u, 0x0020u, CanonicalizeRangeHi },
- { 0x00f7u, 0x00f7u, 0x0000u, CanonicalizeUnique },
- { 0x00f8u, 0x00feu, 0x0020u, CanonicalizeRangeHi },
- { 0x00ffu, 0x00ffu, 0x0079u, CanonicalizeRangeLo },
- { 0x0100u, 0x012fu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0130u, 0x0131u, 0x0000u, CanonicalizeUnique },
- { 0x0132u, 0x0137u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0138u, 0x0138u, 0x0000u, CanonicalizeUnique },
- { 0x0139u, 0x0148u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x0149u, 0x0149u, 0x0000u, CanonicalizeUnique },
- { 0x014au, 0x0177u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0178u, 0x0178u, 0x0079u, CanonicalizeRangeHi },
- { 0x0179u, 0x017eu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x017fu, 0x017fu, 0x0000u, CanonicalizeUnique },
- { 0x0180u, 0x0180u, 0x00c3u, CanonicalizeRangeLo },
- { 0x0181u, 0x0181u, 0x00d2u, CanonicalizeRangeLo },
- { 0x0182u, 0x0185u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0186u, 0x0186u, 0x00ceu, CanonicalizeRangeLo },
- { 0x0187u, 0x0188u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x0189u, 0x018au, 0x00cdu, CanonicalizeRangeLo },
- { 0x018bu, 0x018cu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x018du, 0x018du, 0x0000u, CanonicalizeUnique },
- { 0x018eu, 0x018eu, 0x004fu, CanonicalizeRangeLo },
- { 0x018fu, 0x018fu, 0x00cau, CanonicalizeRangeLo },
- { 0x0190u, 0x0190u, 0x00cbu, CanonicalizeRangeLo },
- { 0x0191u, 0x0192u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x0193u, 0x0193u, 0x00cdu, CanonicalizeRangeLo },
- { 0x0194u, 0x0194u, 0x00cfu, CanonicalizeRangeLo },
- { 0x0195u, 0x0195u, 0x0061u, CanonicalizeRangeLo },
- { 0x0196u, 0x0196u, 0x00d3u, CanonicalizeRangeLo },
- { 0x0197u, 0x0197u, 0x00d1u, CanonicalizeRangeLo },
- { 0x0198u, 0x0199u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x019au, 0x019au, 0x00a3u, CanonicalizeRangeLo },
- { 0x019bu, 0x019bu, 0x0000u, CanonicalizeUnique },
- { 0x019cu, 0x019cu, 0x00d3u, CanonicalizeRangeLo },
- { 0x019du, 0x019du, 0x00d5u, CanonicalizeRangeLo },
- { 0x019eu, 0x019eu, 0x0082u, CanonicalizeRangeLo },
- { 0x019fu, 0x019fu, 0x00d6u, CanonicalizeRangeLo },
- { 0x01a0u, 0x01a5u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x01a6u, 0x01a6u, 0x00dau, CanonicalizeRangeLo },
- { 0x01a7u, 0x01a8u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x01a9u, 0x01a9u, 0x00dau, CanonicalizeRangeLo },
- { 0x01aau, 0x01abu, 0x0000u, CanonicalizeUnique },
- { 0x01acu, 0x01adu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x01aeu, 0x01aeu, 0x00dau, CanonicalizeRangeLo },
- { 0x01afu, 0x01b0u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x01b1u, 0x01b2u, 0x00d9u, CanonicalizeRangeLo },
- { 0x01b3u, 0x01b6u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x01b7u, 0x01b7u, 0x00dbu, CanonicalizeRangeLo },
- { 0x01b8u, 0x01b9u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x01bau, 0x01bbu, 0x0000u, CanonicalizeUnique },
- { 0x01bcu, 0x01bdu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x01beu, 0x01beu, 0x0000u, CanonicalizeUnique },
- { 0x01bfu, 0x01bfu, 0x0038u, CanonicalizeRangeLo },
- { 0x01c0u, 0x01c3u, 0x0000u, CanonicalizeUnique },
- { 0x01c4u, 0x01c6u, 0x0000u, CanonicalizeSet },
- { 0x01c7u, 0x01c9u, 0x0001u, CanonicalizeSet },
- { 0x01cau, 0x01ccu, 0x0002u, CanonicalizeSet },
- { 0x01cdu, 0x01dcu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x01ddu, 0x01ddu, 0x004fu, CanonicalizeRangeHi },
- { 0x01deu, 0x01efu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x01f0u, 0x01f0u, 0x0000u, CanonicalizeUnique },
- { 0x01f1u, 0x01f3u, 0x0003u, CanonicalizeSet },
- { 0x01f4u, 0x01f5u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x01f6u, 0x01f6u, 0x0061u, CanonicalizeRangeHi },
- { 0x01f7u, 0x01f7u, 0x0038u, CanonicalizeRangeHi },
- { 0x01f8u, 0x021fu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0220u, 0x0220u, 0x0082u, CanonicalizeRangeHi },
- { 0x0221u, 0x0221u, 0x0000u, CanonicalizeUnique },
- { 0x0222u, 0x0233u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0234u, 0x0239u, 0x0000u, CanonicalizeUnique },
- { 0x023au, 0x023au, 0x2a2bu, CanonicalizeRangeLo },
- { 0x023bu, 0x023cu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x023du, 0x023du, 0x00a3u, CanonicalizeRangeHi },
- { 0x023eu, 0x023eu, 0x2a28u, CanonicalizeRangeLo },
- { 0x023fu, 0x0240u, 0x2a3fu, CanonicalizeRangeLo },
- { 0x0241u, 0x0242u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x0243u, 0x0243u, 0x00c3u, CanonicalizeRangeHi },
- { 0x0244u, 0x0244u, 0x0045u, CanonicalizeRangeLo },
- { 0x0245u, 0x0245u, 0x0047u, CanonicalizeRangeLo },
- { 0x0246u, 0x024fu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0250u, 0x0250u, 0x2a1fu, CanonicalizeRangeLo },
- { 0x0251u, 0x0251u, 0x2a1cu, CanonicalizeRangeLo },
- { 0x0252u, 0x0252u, 0x2a1eu, CanonicalizeRangeLo },
- { 0x0253u, 0x0253u, 0x00d2u, CanonicalizeRangeHi },
- { 0x0254u, 0x0254u, 0x00ceu, CanonicalizeRangeHi },
- { 0x0255u, 0x0255u, 0x0000u, CanonicalizeUnique },
- { 0x0256u, 0x0257u, 0x00cdu, CanonicalizeRangeHi },
- { 0x0258u, 0x0258u, 0x0000u, CanonicalizeUnique },
- { 0x0259u, 0x0259u, 0x00cau, CanonicalizeRangeHi },
- { 0x025au, 0x025au, 0x0000u, CanonicalizeUnique },
- { 0x025bu, 0x025bu, 0x00cbu, CanonicalizeRangeHi },
- { 0x025cu, 0x025fu, 0x0000u, CanonicalizeUnique },
- { 0x0260u, 0x0260u, 0x00cdu, CanonicalizeRangeHi },
- { 0x0261u, 0x0262u, 0x0000u, CanonicalizeUnique },
- { 0x0263u, 0x0263u, 0x00cfu, CanonicalizeRangeHi },
- { 0x0264u, 0x0264u, 0x0000u, CanonicalizeUnique },
- { 0x0265u, 0x0265u, 0xa528u, CanonicalizeRangeLo },
- { 0x0266u, 0x0267u, 0x0000u, CanonicalizeUnique },
- { 0x0268u, 0x0268u, 0x00d1u, CanonicalizeRangeHi },
- { 0x0269u, 0x0269u, 0x00d3u, CanonicalizeRangeHi },
- { 0x026au, 0x026au, 0x0000u, CanonicalizeUnique },
- { 0x026bu, 0x026bu, 0x29f7u, CanonicalizeRangeLo },
- { 0x026cu, 0x026eu, 0x0000u, CanonicalizeUnique },
- { 0x026fu, 0x026fu, 0x00d3u, CanonicalizeRangeHi },
- { 0x0270u, 0x0270u, 0x0000u, CanonicalizeUnique },
- { 0x0271u, 0x0271u, 0x29fdu, CanonicalizeRangeLo },
- { 0x0272u, 0x0272u, 0x00d5u, CanonicalizeRangeHi },
- { 0x0273u, 0x0274u, 0x0000u, CanonicalizeUnique },
- { 0x0275u, 0x0275u, 0x00d6u, CanonicalizeRangeHi },
- { 0x0276u, 0x027cu, 0x0000u, CanonicalizeUnique },
- { 0x027du, 0x027du, 0x29e7u, CanonicalizeRangeLo },
- { 0x027eu, 0x027fu, 0x0000u, CanonicalizeUnique },
- { 0x0280u, 0x0280u, 0x00dau, CanonicalizeRangeHi },
- { 0x0281u, 0x0282u, 0x0000u, CanonicalizeUnique },
- { 0x0283u, 0x0283u, 0x00dau, CanonicalizeRangeHi },
- { 0x0284u, 0x0287u, 0x0000u, CanonicalizeUnique },
- { 0x0288u, 0x0288u, 0x00dau, CanonicalizeRangeHi },
- { 0x0289u, 0x0289u, 0x0045u, CanonicalizeRangeHi },
- { 0x028au, 0x028bu, 0x00d9u, CanonicalizeRangeHi },
- { 0x028cu, 0x028cu, 0x0047u, CanonicalizeRangeHi },
- { 0x028du, 0x0291u, 0x0000u, CanonicalizeUnique },
- { 0x0292u, 0x0292u, 0x00dbu, CanonicalizeRangeHi },
- { 0x0293u, 0x0344u, 0x0000u, CanonicalizeUnique },
- { 0x0345u, 0x0345u, 0x0007u, CanonicalizeSet },
- { 0x0346u, 0x036fu, 0x0000u, CanonicalizeUnique },
- { 0x0370u, 0x0373u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0374u, 0x0375u, 0x0000u, CanonicalizeUnique },
- { 0x0376u, 0x0377u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0378u, 0x037au, 0x0000u, CanonicalizeUnique },
- { 0x037bu, 0x037du, 0x0082u, CanonicalizeRangeLo },
- { 0x037eu, 0x0385u, 0x0000u, CanonicalizeUnique },
- { 0x0386u, 0x0386u, 0x0026u, CanonicalizeRangeLo },
- { 0x0387u, 0x0387u, 0x0000u, CanonicalizeUnique },
- { 0x0388u, 0x038au, 0x0025u, CanonicalizeRangeLo },
- { 0x038bu, 0x038bu, 0x0000u, CanonicalizeUnique },
- { 0x038cu, 0x038cu, 0x0040u, CanonicalizeRangeLo },
- { 0x038du, 0x038du, 0x0000u, CanonicalizeUnique },
- { 0x038eu, 0x038fu, 0x003fu, CanonicalizeRangeLo },
- { 0x0390u, 0x0390u, 0x0000u, CanonicalizeUnique },
- { 0x0391u, 0x0391u, 0x0020u, CanonicalizeRangeLo },
- { 0x0392u, 0x0392u, 0x0004u, CanonicalizeSet },
- { 0x0393u, 0x0394u, 0x0020u, CanonicalizeRangeLo },
- { 0x0395u, 0x0395u, 0x0005u, CanonicalizeSet },
- { 0x0396u, 0x0397u, 0x0020u, CanonicalizeRangeLo },
- { 0x0398u, 0x0398u, 0x0006u, CanonicalizeSet },
- { 0x0399u, 0x0399u, 0x0007u, CanonicalizeSet },
- { 0x039au, 0x039au, 0x0008u, CanonicalizeSet },
- { 0x039bu, 0x039bu, 0x0020u, CanonicalizeRangeLo },
- { 0x039cu, 0x039cu, 0x0009u, CanonicalizeSet },
- { 0x039du, 0x039fu, 0x0020u, CanonicalizeRangeLo },
- { 0x03a0u, 0x03a0u, 0x000au, CanonicalizeSet },
- { 0x03a1u, 0x03a1u, 0x000bu, CanonicalizeSet },
- { 0x03a2u, 0x03a2u, 0x0000u, CanonicalizeUnique },
- { 0x03a3u, 0x03a3u, 0x000cu, CanonicalizeSet },
- { 0x03a4u, 0x03a5u, 0x0020u, CanonicalizeRangeLo },
- { 0x03a6u, 0x03a6u, 0x000du, CanonicalizeSet },
- { 0x03a7u, 0x03abu, 0x0020u, CanonicalizeRangeLo },
- { 0x03acu, 0x03acu, 0x0026u, CanonicalizeRangeHi },
- { 0x03adu, 0x03afu, 0x0025u, CanonicalizeRangeHi },
- { 0x03b0u, 0x03b0u, 0x0000u, CanonicalizeUnique },
- { 0x03b1u, 0x03b1u, 0x0020u, CanonicalizeRangeHi },
- { 0x03b2u, 0x03b2u, 0x0004u, CanonicalizeSet },
- { 0x03b3u, 0x03b4u, 0x0020u, CanonicalizeRangeHi },
- { 0x03b5u, 0x03b5u, 0x0005u, CanonicalizeSet },
- { 0x03b6u, 0x03b7u, 0x0020u, CanonicalizeRangeHi },
- { 0x03b8u, 0x03b8u, 0x0006u, CanonicalizeSet },
- { 0x03b9u, 0x03b9u, 0x0007u, CanonicalizeSet },
- { 0x03bau, 0x03bau, 0x0008u, CanonicalizeSet },
- { 0x03bbu, 0x03bbu, 0x0020u, CanonicalizeRangeHi },
- { 0x03bcu, 0x03bcu, 0x0009u, CanonicalizeSet },
- { 0x03bdu, 0x03bfu, 0x0020u, CanonicalizeRangeHi },
- { 0x03c0u, 0x03c0u, 0x000au, CanonicalizeSet },
- { 0x03c1u, 0x03c1u, 0x000bu, CanonicalizeSet },
- { 0x03c2u, 0x03c3u, 0x000cu, CanonicalizeSet },
- { 0x03c4u, 0x03c5u, 0x0020u, CanonicalizeRangeHi },
- { 0x03c6u, 0x03c6u, 0x000du, CanonicalizeSet },
- { 0x03c7u, 0x03cbu, 0x0020u, CanonicalizeRangeHi },
- { 0x03ccu, 0x03ccu, 0x0040u, CanonicalizeRangeHi },
- { 0x03cdu, 0x03ceu, 0x003fu, CanonicalizeRangeHi },
- { 0x03cfu, 0x03cfu, 0x0008u, CanonicalizeRangeLo },
- { 0x03d0u, 0x03d0u, 0x0004u, CanonicalizeSet },
- { 0x03d1u, 0x03d1u, 0x0006u, CanonicalizeSet },
- { 0x03d2u, 0x03d4u, 0x0000u, CanonicalizeUnique },
- { 0x03d5u, 0x03d5u, 0x000du, CanonicalizeSet },
- { 0x03d6u, 0x03d6u, 0x000au, CanonicalizeSet },
- { 0x03d7u, 0x03d7u, 0x0008u, CanonicalizeRangeHi },
- { 0x03d8u, 0x03efu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x03f0u, 0x03f0u, 0x0008u, CanonicalizeSet },
- { 0x03f1u, 0x03f1u, 0x000bu, CanonicalizeSet },
- { 0x03f2u, 0x03f2u, 0x0007u, CanonicalizeRangeLo },
- { 0x03f3u, 0x03f4u, 0x0000u, CanonicalizeUnique },
- { 0x03f5u, 0x03f5u, 0x0005u, CanonicalizeSet },
- { 0x03f6u, 0x03f6u, 0x0000u, CanonicalizeUnique },
- { 0x03f7u, 0x03f8u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x03f9u, 0x03f9u, 0x0007u, CanonicalizeRangeHi },
- { 0x03fau, 0x03fbu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x03fcu, 0x03fcu, 0x0000u, CanonicalizeUnique },
- { 0x03fdu, 0x03ffu, 0x0082u, CanonicalizeRangeHi },
- { 0x0400u, 0x040fu, 0x0050u, CanonicalizeRangeLo },
- { 0x0410u, 0x042fu, 0x0020u, CanonicalizeRangeLo },
- { 0x0430u, 0x044fu, 0x0020u, CanonicalizeRangeHi },
- { 0x0450u, 0x045fu, 0x0050u, CanonicalizeRangeHi },
- { 0x0460u, 0x0481u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0482u, 0x0489u, 0x0000u, CanonicalizeUnique },
- { 0x048au, 0x04bfu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x04c0u, 0x04c0u, 0x000fu, CanonicalizeRangeLo },
- { 0x04c1u, 0x04ceu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x04cfu, 0x04cfu, 0x000fu, CanonicalizeRangeHi },
- { 0x04d0u, 0x0527u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x0528u, 0x0530u, 0x0000u, CanonicalizeUnique },
- { 0x0531u, 0x0556u, 0x0030u, CanonicalizeRangeLo },
- { 0x0557u, 0x0560u, 0x0000u, CanonicalizeUnique },
- { 0x0561u, 0x0586u, 0x0030u, CanonicalizeRangeHi },
- { 0x0587u, 0x109fu, 0x0000u, CanonicalizeUnique },
- { 0x10a0u, 0x10c5u, 0x1c60u, CanonicalizeRangeLo },
- { 0x10c6u, 0x1d78u, 0x0000u, CanonicalizeUnique },
- { 0x1d79u, 0x1d79u, 0x8a04u, CanonicalizeRangeLo },
- { 0x1d7au, 0x1d7cu, 0x0000u, CanonicalizeUnique },
- { 0x1d7du, 0x1d7du, 0x0ee6u, CanonicalizeRangeLo },
- { 0x1d7eu, 0x1dffu, 0x0000u, CanonicalizeUnique },
- { 0x1e00u, 0x1e5fu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x1e60u, 0x1e61u, 0x000eu, CanonicalizeSet },
- { 0x1e62u, 0x1e95u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x1e96u, 0x1e9au, 0x0000u, CanonicalizeUnique },
- { 0x1e9bu, 0x1e9bu, 0x000eu, CanonicalizeSet },
- { 0x1e9cu, 0x1e9fu, 0x0000u, CanonicalizeUnique },
- { 0x1ea0u, 0x1effu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x1f00u, 0x1f07u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f08u, 0x1f0fu, 0x0008u, CanonicalizeRangeHi },
- { 0x1f10u, 0x1f15u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f16u, 0x1f17u, 0x0000u, CanonicalizeUnique },
- { 0x1f18u, 0x1f1du, 0x0008u, CanonicalizeRangeHi },
- { 0x1f1eu, 0x1f1fu, 0x0000u, CanonicalizeUnique },
- { 0x1f20u, 0x1f27u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f28u, 0x1f2fu, 0x0008u, CanonicalizeRangeHi },
- { 0x1f30u, 0x1f37u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f38u, 0x1f3fu, 0x0008u, CanonicalizeRangeHi },
- { 0x1f40u, 0x1f45u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f46u, 0x1f47u, 0x0000u, CanonicalizeUnique },
- { 0x1f48u, 0x1f4du, 0x0008u, CanonicalizeRangeHi },
- { 0x1f4eu, 0x1f50u, 0x0000u, CanonicalizeUnique },
- { 0x1f51u, 0x1f51u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f52u, 0x1f52u, 0x0000u, CanonicalizeUnique },
- { 0x1f53u, 0x1f53u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f54u, 0x1f54u, 0x0000u, CanonicalizeUnique },
- { 0x1f55u, 0x1f55u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f56u, 0x1f56u, 0x0000u, CanonicalizeUnique },
- { 0x1f57u, 0x1f57u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f58u, 0x1f58u, 0x0000u, CanonicalizeUnique },
- { 0x1f59u, 0x1f59u, 0x0008u, CanonicalizeRangeHi },
- { 0x1f5au, 0x1f5au, 0x0000u, CanonicalizeUnique },
- { 0x1f5bu, 0x1f5bu, 0x0008u, CanonicalizeRangeHi },
- { 0x1f5cu, 0x1f5cu, 0x0000u, CanonicalizeUnique },
- { 0x1f5du, 0x1f5du, 0x0008u, CanonicalizeRangeHi },
- { 0x1f5eu, 0x1f5eu, 0x0000u, CanonicalizeUnique },
- { 0x1f5fu, 0x1f5fu, 0x0008u, CanonicalizeRangeHi },
- { 0x1f60u, 0x1f67u, 0x0008u, CanonicalizeRangeLo },
- { 0x1f68u, 0x1f6fu, 0x0008u, CanonicalizeRangeHi },
- { 0x1f70u, 0x1f71u, 0x004au, CanonicalizeRangeLo },
- { 0x1f72u, 0x1f75u, 0x0056u, CanonicalizeRangeLo },
- { 0x1f76u, 0x1f77u, 0x0064u, CanonicalizeRangeLo },
- { 0x1f78u, 0x1f79u, 0x0080u, CanonicalizeRangeLo },
- { 0x1f7au, 0x1f7bu, 0x0070u, CanonicalizeRangeLo },
- { 0x1f7cu, 0x1f7du, 0x007eu, CanonicalizeRangeLo },
- { 0x1f7eu, 0x1fafu, 0x0000u, CanonicalizeUnique },
- { 0x1fb0u, 0x1fb1u, 0x0008u, CanonicalizeRangeLo },
- { 0x1fb2u, 0x1fb7u, 0x0000u, CanonicalizeUnique },
- { 0x1fb8u, 0x1fb9u, 0x0008u, CanonicalizeRangeHi },
- { 0x1fbau, 0x1fbbu, 0x004au, CanonicalizeRangeHi },
- { 0x1fbcu, 0x1fbdu, 0x0000u, CanonicalizeUnique },
- { 0x1fbeu, 0x1fbeu, 0x0007u, CanonicalizeSet },
- { 0x1fbfu, 0x1fc7u, 0x0000u, CanonicalizeUnique },
- { 0x1fc8u, 0x1fcbu, 0x0056u, CanonicalizeRangeHi },
- { 0x1fccu, 0x1fcfu, 0x0000u, CanonicalizeUnique },
- { 0x1fd0u, 0x1fd1u, 0x0008u, CanonicalizeRangeLo },
- { 0x1fd2u, 0x1fd7u, 0x0000u, CanonicalizeUnique },
- { 0x1fd8u, 0x1fd9u, 0x0008u, CanonicalizeRangeHi },
- { 0x1fdau, 0x1fdbu, 0x0064u, CanonicalizeRangeHi },
- { 0x1fdcu, 0x1fdfu, 0x0000u, CanonicalizeUnique },
- { 0x1fe0u, 0x1fe1u, 0x0008u, CanonicalizeRangeLo },
- { 0x1fe2u, 0x1fe4u, 0x0000u, CanonicalizeUnique },
- { 0x1fe5u, 0x1fe5u, 0x0007u, CanonicalizeRangeLo },
- { 0x1fe6u, 0x1fe7u, 0x0000u, CanonicalizeUnique },
- { 0x1fe8u, 0x1fe9u, 0x0008u, CanonicalizeRangeHi },
- { 0x1feau, 0x1febu, 0x0070u, CanonicalizeRangeHi },
- { 0x1fecu, 0x1fecu, 0x0007u, CanonicalizeRangeHi },
- { 0x1fedu, 0x1ff7u, 0x0000u, CanonicalizeUnique },
- { 0x1ff8u, 0x1ff9u, 0x0080u, CanonicalizeRangeHi },
- { 0x1ffau, 0x1ffbu, 0x007eu, CanonicalizeRangeHi },
- { 0x1ffcu, 0x2131u, 0x0000u, CanonicalizeUnique },
- { 0x2132u, 0x2132u, 0x001cu, CanonicalizeRangeLo },
- { 0x2133u, 0x214du, 0x0000u, CanonicalizeUnique },
- { 0x214eu, 0x214eu, 0x001cu, CanonicalizeRangeHi },
- { 0x214fu, 0x215fu, 0x0000u, CanonicalizeUnique },
- { 0x2160u, 0x216fu, 0x0010u, CanonicalizeRangeLo },
- { 0x2170u, 0x217fu, 0x0010u, CanonicalizeRangeHi },
- { 0x2180u, 0x2182u, 0x0000u, CanonicalizeUnique },
- { 0x2183u, 0x2184u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x2185u, 0x24b5u, 0x0000u, CanonicalizeUnique },
- { 0x24b6u, 0x24cfu, 0x001au, CanonicalizeRangeLo },
- { 0x24d0u, 0x24e9u, 0x001au, CanonicalizeRangeHi },
- { 0x24eau, 0x2bffu, 0x0000u, CanonicalizeUnique },
- { 0x2c00u, 0x2c2eu, 0x0030u, CanonicalizeRangeLo },
- { 0x2c2fu, 0x2c2fu, 0x0000u, CanonicalizeUnique },
- { 0x2c30u, 0x2c5eu, 0x0030u, CanonicalizeRangeHi },
- { 0x2c5fu, 0x2c5fu, 0x0000u, CanonicalizeUnique },
- { 0x2c60u, 0x2c61u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x2c62u, 0x2c62u, 0x29f7u, CanonicalizeRangeHi },
- { 0x2c63u, 0x2c63u, 0x0ee6u, CanonicalizeRangeHi },
- { 0x2c64u, 0x2c64u, 0x29e7u, CanonicalizeRangeHi },
- { 0x2c65u, 0x2c65u, 0x2a2bu, CanonicalizeRangeHi },
- { 0x2c66u, 0x2c66u, 0x2a28u, CanonicalizeRangeHi },
- { 0x2c67u, 0x2c6cu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x2c6du, 0x2c6du, 0x2a1cu, CanonicalizeRangeHi },
- { 0x2c6eu, 0x2c6eu, 0x29fdu, CanonicalizeRangeHi },
- { 0x2c6fu, 0x2c6fu, 0x2a1fu, CanonicalizeRangeHi },
- { 0x2c70u, 0x2c70u, 0x2a1eu, CanonicalizeRangeHi },
- { 0x2c71u, 0x2c71u, 0x0000u, CanonicalizeUnique },
- { 0x2c72u, 0x2c73u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x2c74u, 0x2c74u, 0x0000u, CanonicalizeUnique },
- { 0x2c75u, 0x2c76u, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x2c77u, 0x2c7du, 0x0000u, CanonicalizeUnique },
- { 0x2c7eu, 0x2c7fu, 0x2a3fu, CanonicalizeRangeHi },
- { 0x2c80u, 0x2ce3u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0x2ce4u, 0x2ceau, 0x0000u, CanonicalizeUnique },
- { 0x2cebu, 0x2ceeu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0x2cefu, 0x2cffu, 0x0000u, CanonicalizeUnique },
- { 0x2d00u, 0x2d25u, 0x1c60u, CanonicalizeRangeHi },
- { 0x2d26u, 0xa63fu, 0x0000u, CanonicalizeUnique },
- { 0xa640u, 0xa66du, 0x0000u, CanonicalizeAlternatingAligned },
- { 0xa66eu, 0xa67fu, 0x0000u, CanonicalizeUnique },
- { 0xa680u, 0xa697u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0xa698u, 0xa721u, 0x0000u, CanonicalizeUnique },
- { 0xa722u, 0xa72fu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0xa730u, 0xa731u, 0x0000u, CanonicalizeUnique },
- { 0xa732u, 0xa76fu, 0x0000u, CanonicalizeAlternatingAligned },
- { 0xa770u, 0xa778u, 0x0000u, CanonicalizeUnique },
- { 0xa779u, 0xa77cu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0xa77du, 0xa77du, 0x8a04u, CanonicalizeRangeHi },
- { 0xa77eu, 0xa787u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0xa788u, 0xa78au, 0x0000u, CanonicalizeUnique },
- { 0xa78bu, 0xa78cu, 0x0000u, CanonicalizeAlternatingUnaligned },
- { 0xa78du, 0xa78du, 0xa528u, CanonicalizeRangeHi },
- { 0xa78eu, 0xa78fu, 0x0000u, CanonicalizeUnique },
- { 0xa790u, 0xa791u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0xa792u, 0xa79fu, 0x0000u, CanonicalizeUnique },
- { 0xa7a0u, 0xa7a9u, 0x0000u, CanonicalizeAlternatingAligned },
- { 0xa7aau, 0xff20u, 0x0000u, CanonicalizeUnique },
- { 0xff21u, 0xff3au, 0x0020u, CanonicalizeRangeLo },
- { 0xff3bu, 0xff40u, 0x0000u, CanonicalizeUnique },
- { 0xff41u, 0xff5au, 0x0020u, CanonicalizeRangeHi },
- { 0xff5bu, 0xffffu, 0x0000u, CanonicalizeUnique },
-};
-
-const size_t LATIN_CANONICALIZATION_RANGES = 20;
-LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = {
- { 0x0000u, 0x0040u, 0x0000u, CanonicalizeLatinSelf },
- { 0x0041u, 0x005au, 0x0000u, CanonicalizeLatinMask0x20 },
- { 0x005bu, 0x0060u, 0x0000u, CanonicalizeLatinSelf },
- { 0x0061u, 0x007au, 0x0000u, CanonicalizeLatinMask0x20 },
- { 0x007bu, 0x00bfu, 0x0000u, CanonicalizeLatinSelf },
- { 0x00c0u, 0x00d6u, 0x0000u, CanonicalizeLatinMask0x20 },
- { 0x00d7u, 0x00d7u, 0x0000u, CanonicalizeLatinSelf },
- { 0x00d8u, 0x00deu, 0x0000u, CanonicalizeLatinMask0x20 },
- { 0x00dfu, 0x00dfu, 0x0000u, CanonicalizeLatinSelf },
- { 0x00e0u, 0x00f6u, 0x0000u, CanonicalizeLatinMask0x20 },
- { 0x00f7u, 0x00f7u, 0x0000u, CanonicalizeLatinSelf },
- { 0x00f8u, 0x00feu, 0x0000u, CanonicalizeLatinMask0x20 },
- { 0x00ffu, 0x00ffu, 0x0000u, CanonicalizeLatinSelf },
- { 0x0100u, 0x0177u, 0x0000u, CanonicalizeLatinInvalid },
- { 0x0178u, 0x0178u, 0x00ffu, CanonicalizeLatinOther },
- { 0x0179u, 0x039bu, 0x0000u, CanonicalizeLatinInvalid },
- { 0x039cu, 0x039cu, 0x00b5u, CanonicalizeLatinOther },
- { 0x039du, 0x03bbu, 0x0000u, CanonicalizeLatinInvalid },
- { 0x03bcu, 0x03bcu, 0x00b5u, CanonicalizeLatinOther },
- { 0x03bdu, 0xffffu, 0x0000u, CanonicalizeLatinInvalid },
-};
-
-} } // JSC::Yarr
-
diff --git a/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.h b/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.h
deleted file mode 100644
index 9dce782..0000000
--- a/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef YarrCanonicalizeUCS2_H
-#define YarrCanonicalizeUCS2_H
-
-#include <stdint.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace JSC { namespace Yarr {
-
-// This set of data (autogenerated using YarrCanonicalizeUCS2.js into YarrCanonicalizeUCS2.cpp)
-// provides information for each UCS2 code point as to the set of code points that it should
-// match under the ES5.1 case insensitive RegExp matching rules, specified in 15.10.2.8.
-enum UCS2CanonicalizationType {
- CanonicalizeUnique, // No canonically equal values, e.g. 0x0.
- CanonicalizeSet, // Value indicates a set in characterSetInfo.
- CanonicalizeRangeLo, // Value is positive delta to pair, E.g. 0x41 has value 0x20, -> 0x61.
- CanonicalizeRangeHi, // Value is positive delta to pair, E.g. 0x61 has value 0x20, -> 0x41.
- CanonicalizeAlternatingAligned, // Aligned consequtive pair, e.g. 0x1f4,0x1f5.
- CanonicalizeAlternatingUnaligned, // Unaligned consequtive pair, e.g. 0x241,0x242.
-};
-struct UCS2CanonicalizationRange { uint16_t begin, end, value, type; };
-extern const size_t UCS2_CANONICALIZATION_RANGES;
-extern uint16_t* characterSetInfo[];
-extern UCS2CanonicalizationRange rangeInfo[];
-
-// This table is similar to the full rangeInfo table, however this maps from UCS2 codepoints to
-// the set of Latin1 codepoints that could match.
-enum LatinCanonicalizationType {
- CanonicalizeLatinSelf, // This character is in the Latin1 range, but has no canonical equivalent in the range.
- CanonicalizeLatinMask0x20, // One of a pair of characters, under the mask 0x20.
- CanonicalizeLatinOther, // This character is not in the Latin1 range, but canonicalizes to another that is.
- CanonicalizeLatinInvalid, // Cannot match against Latin1 input.
-};
-struct LatinCanonicalizationRange { uint16_t begin, end, value, type; };
-extern const size_t LATIN_CANONICALIZATION_RANGES;
-extern LatinCanonicalizationRange latinRangeInfo[];
-
-// This searches in log2 time over ~364 entries, so should typically result in 8 compares.
-inline UCS2CanonicalizationRange* rangeInfoFor(UChar ch)
-{
- UCS2CanonicalizationRange* info = rangeInfo;
- size_t entries = UCS2_CANONICALIZATION_RANGES;
-
- while (true) {
- size_t candidate = entries >> 1;
- UCS2CanonicalizationRange* candidateInfo = info + candidate;
- if (ch < candidateInfo->begin)
- entries = candidate;
- else if (ch <= candidateInfo->end)
- return candidateInfo;
- else {
- info = candidateInfo + 1;
- entries -= (candidate + 1);
- }
- }
-}
-
-// Should only be called for characters that have one canonically matching value.
-inline UChar getCanonicalPair(UCS2CanonicalizationRange* info, UChar ch)
-{
- ASSERT(ch >= info->begin && ch <= info->end);
- switch (info->type) {
- case CanonicalizeRangeLo:
- return ch + info->value;
- case CanonicalizeRangeHi:
- return ch - info->value;
- case CanonicalizeAlternatingAligned:
- return ch ^ 1;
- case CanonicalizeAlternatingUnaligned:
- return ((ch - 1) ^ 1) + 1;
- default:
- RELEASE_ASSERT_NOT_REACHED();
- }
- RELEASE_ASSERT_NOT_REACHED();
- return 0;
-}
-
-// Returns true if no other UCS2 codepoint can match this value.
-inline bool isCanonicallyUnique(UChar ch)
-{
- return rangeInfoFor(ch)->type == CanonicalizeUnique;
-}
-
-// Returns true if values are equal, under the canonicalization rules.
-inline bool areCanonicallyEquivalent(UChar a, UChar b)
-{
- UCS2CanonicalizationRange* info = rangeInfoFor(a);
- switch (info->type) {
- case CanonicalizeUnique:
- return a == b;
- case CanonicalizeSet: {
- for (uint16_t* set = characterSetInfo[info->value]; (a = *set); ++set) {
- if (a == b)
- return true;
- }
- return false;
- }
- case CanonicalizeRangeLo:
- return (a == b) || (a + info->value == b);
- case CanonicalizeRangeHi:
- return (a == b) || (a - info->value == b);
- case CanonicalizeAlternatingAligned:
- return (a | 1) == (b | 1);
- case CanonicalizeAlternatingUnaligned:
- return ((a - 1) | 1) == ((b - 1) | 1);
- }
-
- RELEASE_ASSERT_NOT_REACHED();
- return false;
-}
-
-} } // JSC::Yarr
-
-#endif
diff --git a/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.js b/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.js
deleted file mode 100644
index 98846cb..0000000
--- a/third_party/WebKit/Source/yarr/YarrCanonicalizeUCS2.js
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-// See ES 5.1, 15.10.2.8
-function canonicalize(ch)
-{
- var u = String.fromCharCode(ch).toUpperCase();
- if (u.length > 1)
- return ch;
- var cu = u.charCodeAt(0);
- if (ch >= 128 && cu < 128)
- return ch;
- return cu;
-}
-
-var MAX_UCS2 = 0xFFFF;
-var MAX_LATIN = 0xFF;
-
-var groupedCanonically = [];
-// Pass 1: populate groupedCanonically - this is mapping from canonicalized
-// values back to the set of character code that canonicalize to them.
-for (var i = 0; i <= MAX_UCS2; ++i) {
- var ch = canonicalize(i);
- if (!groupedCanonically[ch])
- groupedCanonically[ch] = [];
- groupedCanonically[ch].push(i);
-}
-
-var typeInfo = [];
-var latinTypeInfo = [];
-var characterSetInfo = [];
-// Pass 2: populate typeInfo & characterSetInfo. For every character calculate
-// a typeInfo value, described by the types above, and a value payload.
-for (cu in groupedCanonically) {
- // The set of characters that canonicalize to cu
- var characters = groupedCanonically[cu];
-
- // If there is only one, it is unique.
- if (characters.length == 1) {
- typeInfo[characters[0]] = "CanonicalizeUnique:0";
- latinTypeInfo[characters[0]] = characters[0] <= MAX_LATIN ? "CanonicalizeLatinSelf:0" : "CanonicalizeLatinInvalid:0";
- continue;
- }
-
- // Sort the array.
- characters.sort(function(x,y){return x-y;});
-
- // If there are more than two characters, create an entry in characterSetInfo.
- if (characters.length > 2) {
- for (i in characters)
- typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length;
- characterSetInfo.push(characters);
-
- if (characters[1] <= MAX_LATIN)
- throw new Error("sets with more than one latin character not supported!");
- if (characters[0] <= MAX_LATIN) {
- for (i in characters)
- latinTypeInfo[characters[i]] = "CanonicalizeLatinOther:" + characters[0];
- latinTypeInfo[characters[0]] = "CanonicalizeLatinSelf:0";
- } else {
- for (i in characters)
- latinTypeInfo[characters[i]] = "CanonicalizeLatinInvalid:0";
- }
-
- continue;
- }
-
- // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner.
- var lo = characters[0];
- var hi = characters[1];
- var delta = hi - lo;
- if (delta == 1) {
- var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0";
- typeInfo[lo] = type;
- typeInfo[hi] = type;
- } else {
- typeInfo[lo] = "CanonicalizeRangeLo:" + delta;
- typeInfo[hi] = "CanonicalizeRangeHi:" + delta;
- }
-
- if (lo > MAX_LATIN) {
- latinTypeInfo[lo] = "CanonicalizeLatinInvalid:0";
- latinTypeInfo[hi] = "CanonicalizeLatinInvalid:0";
- } else if (hi > MAX_LATIN) {
- latinTypeInfo[lo] = "CanonicalizeLatinSelf:0";
- latinTypeInfo[hi] = "CanonicalizeLatinOther:" + lo;
- } else {
- if (delta != 0x20 || lo & 0x20)
- throw new Error("pairs of latin characters that don't mask with 0x20 not supported!");
- latinTypeInfo[lo] = "CanonicalizeLatinMask0x20:0";
- latinTypeInfo[hi] = "CanonicalizeLatinMask0x20:0";
- }
-}
-
-var rangeInfo = [];
-// Pass 3: coallesce types into ranges.
-for (var end = 0; end <= MAX_UCS2; ++end) {
- var begin = end;
- var type = typeInfo[end];
- while (end < MAX_UCS2 && typeInfo[end + 1] == type)
- ++end;
- rangeInfo.push({begin:begin, end:end, type:type});
-}
-
-var latinRangeInfo = [];
-// Pass 4: coallesce latin-1 types into ranges.
-for (var end = 0; end <= MAX_UCS2; ++end) {
- var begin = end;
- var type = latinTypeInfo[end];
- while (end < MAX_UCS2 && latinTypeInfo[end + 1] == type)
- ++end;
- latinRangeInfo.push({begin:begin, end:end, type:type});
-}
-
-
-// Helper function to convert a number to a fixed width hex representation of a C uint16_t.
-function hex(x)
-{
- var s = Number(x).toString(16);
- while (s.length < 4)
- s = 0 + s;
- return "0x" + s + "u";
-}
-
-var copyright = (
- "/*" + "\n" +
- " * Copyright (C) 2012 Apple Inc. All rights reserved." + "\n" +
- " *" + "\n" +
- " * Redistribution and use in source and binary forms, with or without" + "\n" +
- " * modification, are permitted provided that the following conditions" + "\n" +
- " * are met:" + "\n" +
- " * 1. Redistributions of source code must retain the above copyright" + "\n" +
- " * notice, this list of conditions and the following disclaimer." + "\n" +
- " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" +
- " * notice, this list of conditions and the following disclaimer in the" + "\n" +
- " * documentation and/or other materials provided with the distribution." + "\n" +
- " *" + "\n" +
- " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" +
- " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" +
- " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" +
- " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" +
- " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" +
- " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" +
- " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" +
- " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" +
- " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" +
- " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" +
- " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" +
- " */");
-
-print(copyright);
-print();
-print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js");
-print();
-print('#include "config.h"');
-print('#include "yarr/YarrCanonicalizeUCS2.h"');
-print();
-print("namespace JSC { namespace Yarr {");
-print();
-print("#include <stdint.h>");
-print();
-
-for (i in characterSetInfo) {
- var characters = ""
- var set = characterSetInfo[i];
- for (var j in set)
- characters += hex(set[j]) + ", ";
- print("uint16_t ucs2CharacterSet" + i + "[] = { " + characters + "0 };");
-}
-print();
-print("static const size_t UCS2_CANONICALIZATION_SETS = " + characterSetInfo.length + ";");
-print("uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {");
-for (i in characterSetInfo)
-print(" ucs2CharacterSet" + i + ",");
-print("};");
-print();
-print("const size_t UCS2_CANONICALIZATION_RANGES = " + rangeInfo.length + ";");
-print("UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {");
-for (i in rangeInfo) {
- var info = rangeInfo[i];
- var typeAndValue = info.type.split(':');
- print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },");
-}
-print("};");
-print();
-print("const size_t LATIN_CANONICALIZATION_RANGES = " + latinRangeInfo.length + ";");
-print("LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = {");
-for (i in latinRangeInfo) {
- var info = latinRangeInfo[i];
- var typeAndValue = info.type.split(':');
- print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },");
-}
-print("};");
-print();
-print("} } // JSC::Yarr");
-print();
-
diff --git a/third_party/WebKit/Source/yarr/YarrInterpreter.cpp b/third_party/WebKit/Source/yarr/YarrInterpreter.cpp
deleted file mode 100644
index b9d5910..0000000
--- a/third_party/WebKit/Source/yarr/YarrInterpreter.cpp
+++ /dev/null
@@ -1,1959 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "yarr/YarrInterpreter.h"
-
-#include "yarr/Yarr.h"
-#include "yarr/YarrCanonicalizeUCS2.h"
-#include <wtf/BumpPointerAllocator.h>
-#include <wtf/DataLog.h>
-#include <wtf/text/CString.h>
-#include <wtf/text/WTFString.h>
-
-#ifndef NDEBUG
-#include <stdio.h>
-#endif
-
-using namespace WTF;
-
-namespace JSC { namespace Yarr {
-
-template<typename CharType>
-class Interpreter {
-public:
- struct ParenthesesDisjunctionContext;
-
- struct BackTrackInfoPatternCharacter {
- uintptr_t matchAmount;
- };
- struct BackTrackInfoCharacterClass {
- uintptr_t matchAmount;
- };
- struct BackTrackInfoBackReference {
- uintptr_t begin; // Not really needed for greedy quantifiers.
- uintptr_t matchAmount; // Not really needed for fixed quantifiers.
- };
- struct BackTrackInfoAlternative {
- uintptr_t offset;
- };
- struct BackTrackInfoParentheticalAssertion {
- uintptr_t begin;
- };
- struct BackTrackInfoParenthesesOnce {
- uintptr_t begin;
- };
- struct BackTrackInfoParenthesesTerminal {
- uintptr_t begin;
- };
- struct BackTrackInfoParentheses {
- uintptr_t matchAmount;
- ParenthesesDisjunctionContext* lastContext;
- };
-
- static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
- {
- context->next = backTrack->lastContext;
- backTrack->lastContext = context;
- ++backTrack->matchAmount;
- }
-
- static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
- {
- RELEASE_ASSERT(backTrack->matchAmount);
- RELEASE_ASSERT(backTrack->lastContext);
- backTrack->lastContext = backTrack->lastContext->next;
- --backTrack->matchAmount;
- }
-
- struct DisjunctionContext
- {
- DisjunctionContext()
- : term(0)
- {
- }
-
- void* operator new(size_t, void* where)
- {
- return where;
- }
-
- int term;
- unsigned matchBegin;
- unsigned matchEnd;
- uintptr_t frame[1];
- };
-
- DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
- {
- size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
- allocatorPool = allocatorPool->ensureCapacity(size);
- RELEASE_ASSERT(allocatorPool);
- return new (allocatorPool->alloc(size)) DisjunctionContext();
- }
-
- void freeDisjunctionContext(DisjunctionContext* context)
- {
- allocatorPool = allocatorPool->dealloc(context);
- }
-
- struct ParenthesesDisjunctionContext
- {
- ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
- : next(0)
- {
- unsigned firstSubpatternId = term.atom.subpatternId;
- unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
-
- for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
- subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
- output[(firstSubpatternId << 1) + i] = offsetNoMatch;
- }
-
- new (getDisjunctionContext(term)) DisjunctionContext();
- }
-
- void* operator new(size_t, void* where)
- {
- return where;
- }
-
- void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
- {
- for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
- output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
- }
-
- DisjunctionContext* getDisjunctionContext(ByteTerm& term)
- {
- return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
- }
-
- ParenthesesDisjunctionContext* next;
- unsigned subpatternBackup[1];
- };
-
- ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
- {
- size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
- allocatorPool = allocatorPool->ensureCapacity(size);
- RELEASE_ASSERT(allocatorPool);
- return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
- }
-
- void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
- {
- allocatorPool = allocatorPool->dealloc(context);
- }
-
- class InputStream {
- public:
- InputStream(const CharType* input, unsigned start, unsigned length)
- : input(input)
- , pos(start)
- , length(length)
- {
- }
-
- void next()
- {
- ++pos;
- }
-
- void rewind(unsigned amount)
- {
- ASSERT(pos >= amount);
- pos -= amount;
- }
-
- int read()
- {
- ASSERT(pos < length);
- if (pos < length)
- return input[pos];
- return -1;
- }
-
- int readPair()
- {
- ASSERT(pos + 1 < length);
- return input[pos] | input[pos + 1] << 16;
- }
-
- int readChecked(unsigned negativePositionOffest)
- {
- RELEASE_ASSERT(pos >= negativePositionOffest);
- unsigned p = pos - negativePositionOffest;
- ASSERT(p < length);
- return input[p];
- }
-
- int reread(unsigned from)
- {
- ASSERT(from < length);
- return input[from];
- }
-
- int prev()
- {
- ASSERT(!(pos > length));
- if (pos && length)
- return input[pos - 1];
- return -1;
- }
-
- unsigned getPos()
- {
- return pos;
- }
-
- void setPos(unsigned p)
- {
- pos = p;
- }
-
- bool atStart()
- {
- return pos == 0;
- }
-
- bool atEnd()
- {
- return pos == length;
- }
-
- unsigned end()
- {
- return length;
- }
-
- bool checkInput(unsigned count)
- {
- if (((pos + count) <= length) && ((pos + count) >= pos)) {
- pos += count;
- return true;
- }
- return false;
- }
-
- void uncheckInput(unsigned count)
- {
- RELEASE_ASSERT(pos >= count);
- pos -= count;
- }
-
- bool atStart(unsigned negativePositionOffest)
- {
- return pos == negativePositionOffest;
- }
-
- bool atEnd(unsigned negativePositionOffest)
- {
- RELEASE_ASSERT(pos >= negativePositionOffest);
- return (pos - negativePositionOffest) == length;
- }
-
- bool isAvailableInput(unsigned offset)
- {
- return (((pos + offset) <= length) && ((pos + offset) >= pos));
- }
-
- private:
- const CharType* input;
- unsigned pos;
- unsigned length;
- };
-
- bool testCharacterClass(CharacterClass* characterClass, int ch)
- {
- if (ch & 0xFF80) {
- for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
- if (ch == characterClass->m_matchesUnicode[i])
- return true;
- for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
- if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
- return true;
- } else {
- for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
- if (ch == characterClass->m_matches[i])
- return true;
- for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
- if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
- return true;
- }
-
- return false;
- }
-
- bool checkCharacter(int testChar, unsigned negativeInputOffset)
- {
- return testChar == input.readChecked(negativeInputOffset);
- }
-
- bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
- {
- int ch = input.readChecked(negativeInputOffset);
- return (loChar == ch) || (hiChar == ch);
- }
-
- bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
- {
- bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
- return invert ? !match : match;
- }
-
- bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
- {
- unsigned matchSize = (unsigned)(matchEnd - matchBegin);
-
- if (!input.checkInput(matchSize))
- return false;
-
- if (pattern->m_ignoreCase) {
- for (unsigned i = 0; i < matchSize; ++i) {
- int oldCh = input.reread(matchBegin + i);
- int ch = input.readChecked(negativeInputOffset + matchSize - i);
-
- if (oldCh == ch)
- continue;
-
- // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
- // unicode values are never allowed to match against ascii ones.
- if (isASCII(oldCh) || isASCII(ch)) {
- if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
- continue;
- } else if (areCanonicallyEquivalent(oldCh, ch))
- continue;
-
- input.uncheckInput(matchSize);
- return false;
- }
- } else {
- for (unsigned i = 0; i < matchSize; ++i) {
- if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
- input.uncheckInput(matchSize);
- return false;
- }
- }
- }
-
- return true;
- }
-
- bool matchAssertionBOL(ByteTerm& term)
- {
- return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
- }
-
- bool matchAssertionEOL(ByteTerm& term)
- {
- if (term.inputPosition)
- return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
-
- return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
- }
-
- bool matchAssertionWordBoundary(ByteTerm& term)
- {
- bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
- bool readIsWordchar;
- if (term.inputPosition)
- readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
- else
- readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
-
- bool wordBoundary = prevIsWordchar != readIsWordchar;
- return term.invert() ? !wordBoundary : wordBoundary;
- }
-
- bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
- {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.uncheckInput(1);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- ++backTrack->matchAmount;
- if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
- return true;
- }
- input.uncheckInput(backTrack->matchAmount);
- break;
- }
-
- return false;
- }
-
- bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
- {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.uncheckInput(1);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- ++backTrack->matchAmount;
- if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
- return true;
- }
- input.uncheckInput(backTrack->matchAmount);
- break;
- }
-
- return false;
- }
-
- bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeCharacterClass);
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
- if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
- return false;
- }
- return true;
- }
-
- case QuantifierGreedy: {
- unsigned matchAmount = 0;
- while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
- input.uncheckInput(1);
- break;
- }
- ++matchAmount;
- }
- backTrack->matchAmount = matchAmount;
-
- return true;
- }
-
- case QuantifierNonGreedy:
- backTrack->matchAmount = 0;
- return true;
- }
-
- RELEASE_ASSERT_NOT_REACHED();
- return false;
- }
-
- bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeCharacterClass);
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.uncheckInput(1);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
- ++backTrack->matchAmount;
- if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
- return true;
- }
- input.uncheckInput(backTrack->matchAmount);
- break;
- }
-
- return false;
- }
-
- bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeBackReference);
- BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
-
- unsigned matchBegin = output[(term.atom.subpatternId << 1)];
- unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
-
- // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
- // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
- // Eg.: /(a\1)/
- if (matchEnd == offsetNoMatch)
- return true;
-
- if (matchBegin == offsetNoMatch)
- return true;
-
- ASSERT(matchBegin <= matchEnd);
-
- if (matchBegin == matchEnd)
- return true;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- backTrack->begin = input.getPos();
- for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
- if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
- input.setPos(backTrack->begin);
- return false;
- }
- }
- return true;
- }
-
- case QuantifierGreedy: {
- unsigned matchAmount = 0;
- while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
- ++matchAmount;
- backTrack->matchAmount = matchAmount;
- return true;
- }
-
- case QuantifierNonGreedy:
- backTrack->begin = input.getPos();
- backTrack->matchAmount = 0;
- return true;
- }
-
- RELEASE_ASSERT_NOT_REACHED();
- return false;
- }
-
- bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeBackReference);
- BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
-
- unsigned matchBegin = output[(term.atom.subpatternId << 1)];
- unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
-
- if (matchBegin == offsetNoMatch)
- return false;
-
- ASSERT(matchBegin <= matchEnd);
-
- if (matchBegin == matchEnd)
- return false;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount:
- // for quantityCount == 1, could rewind.
- input.setPos(backTrack->begin);
- break;
-
- case QuantifierGreedy:
- if (backTrack->matchAmount) {
- --backTrack->matchAmount;
- input.rewind(matchEnd - matchBegin);
- return true;
- }
- break;
-
- case QuantifierNonGreedy:
- if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
- ++backTrack->matchAmount;
- return true;
- }
- input.setPos(backTrack->begin);
- break;
- }
-
- return false;
- }
-
- void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
- {
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
- output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
- }
- }
- void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
- {
- unsigned firstSubpatternId = term.atom.subpatternId;
- unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
- context->restoreOutput(output, firstSubpatternId, count);
- }
- JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
- {
- while (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
-
- JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
- if (result == JSRegExpMatch)
- return JSRegExpMatch;
-
- resetMatches(term, context);
- popParenthesesDisjunctionContext(backTrack);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- }
-
- return JSRegExpNoMatch;
- }
-
- bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierGreedy: {
- // set this speculatively; if we get to the parens end this will be true.
- backTrack->begin = input.getPos();
- break;
- }
- case QuantifierNonGreedy: {
- backTrack->begin = notFound;
- context->term += term.atom.parenthesesWidth;
- return true;
- }
- case QuantifierFixedCount:
- break;
- }
-
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
- }
-
- return true;
- }
-
- bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
- }
-
- if (term.atom.quantityType == QuantifierFixedCount)
- return true;
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
- return backTrack->begin != input.getPos();
- }
-
- bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
-
- if (term.capture()) {
- unsigned subpatternId = term.atom.subpatternId;
- output[(subpatternId << 1)] = offsetNoMatch;
- output[(subpatternId << 1) + 1] = offsetNoMatch;
- }
-
- switch (term.atom.quantityType) {
- case QuantifierGreedy:
- // if we backtrack to this point, there is another chance - try matching nothing.
- ASSERT(backTrack->begin != notFound);
- backTrack->begin = notFound;
- context->term += term.atom.parenthesesWidth;
- return true;
- case QuantifierNonGreedy:
- ASSERT(backTrack->begin != notFound);
- case QuantifierFixedCount:
- break;
- }
-
- return false;
- }
-
- bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
-
- switch (term.atom.quantityType) {
- case QuantifierGreedy:
- if (backTrack->begin == notFound) {
- context->term -= term.atom.parenthesesWidth;
- return false;
- }
- case QuantifierNonGreedy:
- if (backTrack->begin == notFound) {
- backTrack->begin = input.getPos();
- if (term.capture()) {
- // Technically this access to inputPosition should be accessing the begin term's
- // inputPosition, but for repeats other than fixed these values should be
- // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
- ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
- ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
- unsigned subpatternId = term.atom.subpatternId;
- output[subpatternId << 1] = input.getPos() + term.inputPosition;
- }
- context->term -= term.atom.parenthesesWidth;
- return true;
- }
- case QuantifierFixedCount:
- break;
- }
-
- return false;
- }
-
- bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
- ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == quantifyInfinite);
- ASSERT(!term.capture());
-
- BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
- backTrack->begin = input.getPos();
- return true;
- }
-
- bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
-
- BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
- // Empty match is a failed match.
- if (backTrack->begin == input.getPos())
- return false;
-
- // Successful match! Okay, what's next? - loop around and try to match moar!
- context->term -= (term.atom.parenthesesWidth + 1);
- return true;
- }
-
- bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
- ASSERT(term.atom.quantityType == QuantifierGreedy);
- ASSERT(term.atom.quantityCount == quantifyInfinite);
- ASSERT(!term.capture());
-
- // If we backtrack to this point, we have failed to match this iteration of the parens.
- // Since this is greedy / zero minimum a failed is also accepted as a match!
- context->term += term.atom.parenthesesWidth;
- return true;
- }
-
- bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
- {
- // 'Terminal' parentheses are at the end of the regex, and as such a match past end
- // should always be returned as a successful match - we should never backtrack to here.
- RELEASE_ASSERT_NOT_REACHED();
- return false;
- }
-
- bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
-
- backTrack->begin = input.getPos();
- return true;
- }
-
- bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
-
- input.setPos(backTrack->begin);
-
- // We've reached the end of the parens; if they are inverted, this is failure.
- if (term.invert()) {
- context->term -= term.atom.parenthesesWidth;
- return false;
- }
-
- return true;
- }
-
- bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
- ASSERT(term.atom.quantityCount == 1);
-
- // We've failed to match parens; if they are inverted, this is win!
- if (term.invert()) {
- context->term += term.atom.parenthesesWidth;
- return true;
- }
-
- return false;
- }
-
- bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
- ASSERT(term.atom.quantityCount == 1);
-
- BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
-
- input.setPos(backTrack->begin);
-
- context->term -= term.atom.parenthesesWidth;
- return false;
- }
-
- JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
-
- BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
- ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
-
- backTrack->matchAmount = 0;
- backTrack->lastContext = 0;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- // While we haven't yet reached our fixed limit,
- while (backTrack->matchAmount < term.atom.quantityCount) {
- // Try to do a match, and it it succeeds, add it to the list.
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (result == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- // The match failed; try to find an alternate point to carry on from.
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
- if (backtrackResult != JSRegExpMatch)
- return backtrackResult;
- }
- }
-
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- return JSRegExpMatch;
- }
-
- case QuantifierGreedy: {
- while (backTrack->matchAmount < term.atom.quantityCount) {
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (result == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
-
- break;
- }
- }
-
- if (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- }
- return JSRegExpMatch;
- }
-
- case QuantifierNonGreedy:
- return JSRegExpMatch;
- }
-
- RELEASE_ASSERT_NOT_REACHED();
- return JSRegExpErrorNoMatch;
- }
-
- // Rules for backtracking differ depending on whether this is greedy or non-greedy.
- //
- // Greedy matches never should try just adding more - you should already have done
- // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
- // you backtrack an item off the list needs checking, since we'll never have matched
- // the one less case. Tracking forwards, still add as much as possible.
- //
- // Non-greedy, we've already done the one less case, so don't match on popping.
- // We haven't done the one more case, so always try to add that.
- //
- JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
- {
- ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
-
- BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
- ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
-
- switch (term.atom.quantityType) {
- case QuantifierFixedCount: {
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
-
- ParenthesesDisjunctionContext* context = 0;
- JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
-
- if (result != JSRegExpMatch)
- return result;
-
- // While we haven't yet reached our fixed limit,
- while (backTrack->matchAmount < term.atom.quantityCount) {
- // Try to do a match, and it it succeeds, add it to the list.
- context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
-
- if (result == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- // The match failed; try to find an alternate point to carry on from.
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
- if (backtrackResult != JSRegExpMatch)
- return backtrackResult;
- }
- }
-
- ASSERT(backTrack->matchAmount == term.atom.quantityCount);
- context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- return JSRegExpMatch;
- }
-
- case QuantifierGreedy: {
- if (!backTrack->matchAmount)
- return JSRegExpNoMatch;
-
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
- if (result == JSRegExpMatch) {
- while (backTrack->matchAmount < term.atom.quantityCount) {
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (parenthesesResult == JSRegExpMatch)
- appendParenthesesDisjunctionContext(backTrack, context);
- else {
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (parenthesesResult != JSRegExpNoMatch)
- return parenthesesResult;
-
- break;
- }
- }
- } else {
- resetMatches(term, context);
- popParenthesesDisjunctionContext(backTrack);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- }
-
- if (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- }
- return JSRegExpMatch;
- }
-
- case QuantifierNonGreedy: {
- // If we've not reached the limit, try to add one more match.
- if (backTrack->matchAmount < term.atom.quantityCount) {
- ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
- if (result == JSRegExpMatch) {
- appendParenthesesDisjunctionContext(backTrack, context);
- recordParenthesesMatch(term, context);
- return JSRegExpMatch;
- }
-
- resetMatches(term, context);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- }
-
- // Nope - okay backtrack looking for an alternative.
- while (backTrack->matchAmount) {
- ParenthesesDisjunctionContext* context = backTrack->lastContext;
- JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
- if (result == JSRegExpMatch) {
- // successful backtrack! we're back in the game!
- if (backTrack->matchAmount) {
- context = backTrack->lastContext;
- recordParenthesesMatch(term, context);
- }
- return JSRegExpMatch;
- }
-
- // pop a match off the stack
- resetMatches(term, context);
- popParenthesesDisjunctionContext(backTrack);
- freeParenthesesDisjunctionContext(context);
-
- if (result != JSRegExpNoMatch)
- return result;
- }
-
- return JSRegExpNoMatch;
- }
- }
-
- RELEASE_ASSERT_NOT_REACHED();
- return JSRegExpErrorNoMatch;
- }
-
- bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
- {
- UNUSED_PARAM(term);
- unsigned matchBegin = context->matchBegin;
-
- if (matchBegin) {
- for (matchBegin--; true; matchBegin--) {
- if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
- ++matchBegin;
- break;
- }
-
- if (!matchBegin)
- break;
- }
- }
-
- unsigned matchEnd = input.getPos();
-
- for (; (matchEnd != input.end())
- && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
-
- if (((matchBegin && term.anchors.m_bol)
- || ((matchEnd != input.end()) && term.anchors.m_eol))
- && !pattern->m_multiline)
- return false;
-
- context->matchBegin = matchBegin;
- context->matchEnd = matchEnd;
- return true;
- }
-
-#define MATCH_NEXT() { ++context->term; goto matchAgain; }
-#define BACKTRACK() { --context->term; goto backtrack; }
-#define currentTerm() (disjunction->terms[context->term])
- JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
- {
- if (!--remainingMatchCount)
- return JSRegExpErrorHitLimit;
-
- if (btrack)
- BACKTRACK();
-
- context->matchBegin = input.getPos();
- context->term = 0;
-
- matchAgain:
- ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
-
- switch (currentTerm().type) {
- case ByteTerm::TypeSubpatternBegin:
- MATCH_NEXT();
- case ByteTerm::TypeSubpatternEnd:
- context->matchEnd = input.getPos();
- return JSRegExpMatch;
-
- case ByteTerm::TypeBodyAlternativeBegin:
- MATCH_NEXT();
- case ByteTerm::TypeBodyAlternativeDisjunction:
- case ByteTerm::TypeBodyAlternativeEnd:
- context->matchEnd = input.getPos();
- return JSRegExpMatch;
-
- case ByteTerm::TypeAlternativeBegin:
- MATCH_NEXT();
- case ByteTerm::TypeAlternativeDisjunction:
- case ByteTerm::TypeAlternativeEnd: {
- int offset = currentTerm().alternative.end;
- BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
- backTrack->offset = offset;
- context->term += offset;
- MATCH_NEXT();
- }
-
- case ByteTerm::TypeAssertionBOL:
- if (matchAssertionBOL(currentTerm()))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeAssertionEOL:
- if (matchAssertionEOL(currentTerm()))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeAssertionWordBoundary:
- if (matchAssertionWordBoundary(currentTerm()))
- MATCH_NEXT();
- BACKTRACK();
-
- case ByteTerm::TypePatternCharacterOnce:
- case ByteTerm::TypePatternCharacterFixed: {
- for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
- if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
- BACKTRACK();
- }
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCharacterGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- unsigned matchAmount = 0;
- while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
- if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
- input.uncheckInput(1);
- break;
- }
- ++matchAmount;
- }
- backTrack->matchAmount = matchAmount;
-
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCharacterNonGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- backTrack->matchAmount = 0;
- MATCH_NEXT();
- }
-
- case ByteTerm::TypePatternCasedCharacterOnce:
- case ByteTerm::TypePatternCasedCharacterFixed: {
- for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
- if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
- BACKTRACK();
- }
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCasedCharacterGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- unsigned matchAmount = 0;
- while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
- if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
- input.uncheckInput(1);
- break;
- }
- ++matchAmount;
- }
- backTrack->matchAmount = matchAmount;
-
- MATCH_NEXT();
- }
- case ByteTerm::TypePatternCasedCharacterNonGreedy: {
- BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
- backTrack->matchAmount = 0;
- MATCH_NEXT();
- }
-
- case ByteTerm::TypeCharacterClass:
- if (matchCharacterClass(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeBackReference:
- if (matchBackReference(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpattern: {
- JSRegExpResult result = matchParentheses(currentTerm(), context);
-
- if (result == JSRegExpMatch) {
- MATCH_NEXT();
- } else if (result != JSRegExpNoMatch)
- return result;
-
- BACKTRACK();
- }
- case ByteTerm::TypeParenthesesSubpatternOnceBegin:
- if (matchParenthesesOnceBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternOnceEnd:
- if (matchParenthesesOnceEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
- if (matchParenthesesTerminalBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
- if (matchParenthesesTerminalEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionBegin:
- if (matchParentheticalAssertionBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionEnd:
- if (matchParentheticalAssertionEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
-
- case ByteTerm::TypeCheckInput:
- if (input.checkInput(currentTerm().checkInputCount))
- MATCH_NEXT();
- BACKTRACK();
-
- case ByteTerm::TypeUncheckInput:
- input.uncheckInput(currentTerm().checkInputCount);
- MATCH_NEXT();
-
- case ByteTerm::TypeDotStarEnclosure:
- if (matchDotStarEnclosure(currentTerm(), context))
- return JSRegExpMatch;
- BACKTRACK();
- }
-
- // We should never fall-through to here.
- RELEASE_ASSERT_NOT_REACHED();
-
- backtrack:
- ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
-
- switch (currentTerm().type) {
- case ByteTerm::TypeSubpatternBegin:
- return JSRegExpNoMatch;
- case ByteTerm::TypeSubpatternEnd:
- RELEASE_ASSERT_NOT_REACHED();
-
- case ByteTerm::TypeBodyAlternativeBegin:
- case ByteTerm::TypeBodyAlternativeDisjunction: {
- int offset = currentTerm().alternative.next;
- context->term += offset;
- if (offset > 0)
- MATCH_NEXT();
-
- if (input.atEnd())
- return JSRegExpNoMatch;
-
- input.next();
-
- context->matchBegin = input.getPos();
-
- if (currentTerm().alternative.onceThrough)
- context->term += currentTerm().alternative.next;
-
- MATCH_NEXT();
- }
- case ByteTerm::TypeBodyAlternativeEnd:
- RELEASE_ASSERT_NOT_REACHED();
-
- case ByteTerm::TypeAlternativeBegin:
- case ByteTerm::TypeAlternativeDisjunction: {
- int offset = currentTerm().alternative.next;
- context->term += offset;
- if (offset > 0)
- MATCH_NEXT();
- BACKTRACK();
- }
- case ByteTerm::TypeAlternativeEnd: {
- // We should never backtrack back into an alternative of the main body of the regex.
- BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
- unsigned offset = backTrack->offset;
- context->term -= offset;
- BACKTRACK();
- }
-
- case ByteTerm::TypeAssertionBOL:
- case ByteTerm::TypeAssertionEOL:
- case ByteTerm::TypeAssertionWordBoundary:
- BACKTRACK();
-
- case ByteTerm::TypePatternCharacterOnce:
- case ByteTerm::TypePatternCharacterFixed:
- case ByteTerm::TypePatternCharacterGreedy:
- case ByteTerm::TypePatternCharacterNonGreedy:
- if (backtrackPatternCharacter(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypePatternCasedCharacterOnce:
- case ByteTerm::TypePatternCasedCharacterFixed:
- case ByteTerm::TypePatternCasedCharacterGreedy:
- case ByteTerm::TypePatternCasedCharacterNonGreedy:
- if (backtrackPatternCasedCharacter(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeCharacterClass:
- if (backtrackCharacterClass(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeBackReference:
- if (backtrackBackReference(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpattern: {
- JSRegExpResult result = backtrackParentheses(currentTerm(), context);
-
- if (result == JSRegExpMatch) {
- MATCH_NEXT();
- } else if (result != JSRegExpNoMatch)
- return result;
-
- BACKTRACK();
- }
- case ByteTerm::TypeParenthesesSubpatternOnceBegin:
- if (backtrackParenthesesOnceBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternOnceEnd:
- if (backtrackParenthesesOnceEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
- if (backtrackParenthesesTerminalBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
- if (backtrackParenthesesTerminalEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionBegin:
- if (backtrackParentheticalAssertionBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionEnd:
- if (backtrackParentheticalAssertionEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
-
- case ByteTerm::TypeCheckInput:
- input.uncheckInput(currentTerm().checkInputCount);
- BACKTRACK();
-
- case ByteTerm::TypeUncheckInput:
- input.checkInput(currentTerm().checkInputCount);
- BACKTRACK();
-
- case ByteTerm::TypeDotStarEnclosure:
- RELEASE_ASSERT_NOT_REACHED();
- }
-
- RELEASE_ASSERT_NOT_REACHED();
- return JSRegExpErrorNoMatch;
- }
-
- JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
- {
- JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
-
- if (result == JSRegExpMatch) {
- while (context->matchBegin == context->matchEnd) {
- result = matchDisjunction(disjunction, context, true);
- if (result != JSRegExpMatch)
- return result;
- }
- return JSRegExpMatch;
- }
-
- return result;
- }
-
- unsigned interpret()
- {
- if (!input.isAvailableInput(0))
- return offsetNoMatch;
-
- for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
- output[i << 1] = offsetNoMatch;
-
- allocatorPool = pattern->m_allocator->startAllocator();
- RELEASE_ASSERT(allocatorPool);
-
- DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
-
- JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
- if (result == JSRegExpMatch) {
- output[0] = context->matchBegin;
- output[1] = context->matchEnd;
- }
-
- freeDisjunctionContext(context);
-
- pattern->m_allocator->stopAllocator();
-
- ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
- return output[0];
- }
-
- Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
- : pattern(pattern)
- , output(output)
- , input(input, start, length)
- , allocatorPool(0)
- , remainingMatchCount(matchLimit)
- {
- }
-
-private:
- BytecodePattern* pattern;
- unsigned* output;
- InputStream input;
- BumpPointerPool* allocatorPool;
- unsigned remainingMatchCount;
-};
-
-class ByteCompiler {
- struct ParenthesesStackEntry {
- unsigned beginTerm;
- unsigned savedAlternativeIndex;
- ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
- : beginTerm(beginTerm)
- , savedAlternativeIndex(savedAlternativeIndex)
- {
- }
- };
-
-public:
- ByteCompiler(YarrPattern& pattern)
- : m_pattern(pattern)
- {
- m_currentAlternativeIndex = 0;
- }
-
- PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
- {
- regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
- emitDisjunction(m_pattern.m_body);
- regexEnd();
-
- return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
- }
-
- void checkInput(unsigned count)
- {
- m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
- }
-
- void uncheckInput(unsigned count)
- {
- m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
- }
-
- void assertionBOL(unsigned inputPosition)
- {
- m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
- }
-
- void assertionEOL(unsigned inputPosition)
- {
- m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
- }
-
- void assertionWordBoundary(bool invert, unsigned inputPosition)
- {
- m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
- }
-
- void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- {
- if (m_pattern.m_ignoreCase) {
- UChar lo = Unicode::toLower(ch);
- UChar hi = Unicode::toUpper(ch);
-
- if (lo != hi) {
- m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
- return;
- }
- }
-
- m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
- }
-
- void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- {
- m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
-
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- }
-
- void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- {
- ASSERT(subpatternId);
-
- m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
-
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- }
-
- void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
- // then fix this up at the end! - simplifying this should make it much clearer.
- // https://bugs.webkit.org/show_bug.cgi?id=50136
-
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
- {
- int beginTerm = m_bodyDisjunction->terms.size();
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
- m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
-
- m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
- m_currentAlternativeIndex = beginTerm + 1;
- }
-
- void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
-
- bool invert = m_bodyDisjunction->terms[beginTerm].invert();
- unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
- m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
- }
-
- void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
- {
- m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
- }
-
- unsigned popParenthesesStack()
- {
- ASSERT(m_parenthesesStack.size());
- int stackEnd = m_parenthesesStack.size() - 1;
- unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
- m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
- m_parenthesesStack.shrink(stackEnd);
-
- ASSERT(beginTerm < m_bodyDisjunction->terms.size());
- ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
-
- return beginTerm;
- }
-
-#ifndef NDEBUG
- void dumpDisjunction(ByteDisjunction* disjunction)
- {
- dataLogF("ByteDisjunction(%p):\n\t", disjunction);
- for (unsigned i = 0; i < disjunction->terms.size(); ++i)
- dataLogF("{ %d } ", disjunction->terms[i].type);
- dataLogF("\n");
- }
-#endif
-
- void closeAlternative(int beginTerm)
- {
- int origBeginTerm = beginTerm;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
- int endIndex = m_bodyDisjunction->terms.size();
-
- unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
-
- if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
- m_bodyDisjunction->terms.remove(beginTerm);
- else {
- while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
- beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
- m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
- m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
- }
-
- m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
-
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
- m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
- }
- }
-
- void closeBodyAlternative()
- {
- int beginTerm = 0;
- int origBeginTerm = 0;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
- int endIndex = m_bodyDisjunction->terms.size();
-
- unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
-
- while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
- beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
- m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
- m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
- }
-
- m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
-
- m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
- m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
- }
-
- void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
-
- ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
-
- bool capture = parenthesesBegin.capture();
- unsigned subpatternId = parenthesesBegin.atom.subpatternId;
-
- unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
- OwnPtr<ByteDisjunction> parenthesesDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
-
- unsigned firstTermInParentheses = beginTerm + 1;
- parenthesesDisjunction->terms.reserveInitialCapacity(endTerm - firstTermInParentheses + 2);
-
- parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
- for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
- parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
- parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
-
- m_bodyDisjunction->terms.shrink(beginTerm);
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
- m_allParenthesesInfo.append(parenthesesDisjunction.release());
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
- }
-
- void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
-
- bool capture = m_bodyDisjunction->terms[beginTerm].capture();
- unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
- }
-
- void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- {
- unsigned beginTerm = popParenthesesStack();
- closeAlternative(beginTerm + 1);
- unsigned endTerm = m_bodyDisjunction->terms.size();
-
- ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
-
- bool capture = m_bodyDisjunction->terms[beginTerm].capture();
- unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
-
- m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
- m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
- m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
-
- m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
- m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
- m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
- }
-
- void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
- {
- m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
- m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
- m_bodyDisjunction->terms[0].frameLocation = 0;
- m_currentAlternativeIndex = 0;
- }
-
- void regexEnd()
- {
- closeBodyAlternative();
- }
-
- void alternativeBodyDisjunction(bool onceThrough)
- {
- int newAlternativeIndex = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
- m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
-
- m_currentAlternativeIndex = newAlternativeIndex;
- }
-
- void alternativeDisjunction()
- {
- int newAlternativeIndex = m_bodyDisjunction->terms.size();
- m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
- m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
-
- m_currentAlternativeIndex = newAlternativeIndex;
- }
-
- void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
- {
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
-
- PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
-
- if (alt) {
- if (disjunction == m_pattern.m_body)
- alternativeBodyDisjunction(alternative->onceThrough());
- else
- alternativeDisjunction();
- }
-
- unsigned minimumSize = alternative->m_minimumSize;
- ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
- unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
-
- if (countToCheck) {
- checkInput(countToCheck);
- currentCountAlreadyChecked += countToCheck;
- }
-
- for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
- PatternTerm& term = alternative->m_terms[i];
-
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- assertionBOL(currentCountAlreadyChecked - term.inputPosition);
- break;
-
- case PatternTerm::TypeAssertionEOL:
- assertionEOL(currentCountAlreadyChecked - term.inputPosition);
- break;
-
- case PatternTerm::TypeAssertionWordBoundary:
- assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
- break;
-
- case PatternTerm::TypePatternCharacter:
- atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
- break;
-
- case PatternTerm::TypeCharacterClass:
- atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
- break;
-
- case PatternTerm::TypeBackReference:
- atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
- break;
-
- case PatternTerm::TypeForwardReference:
- break;
-
- case PatternTerm::TypeParenthesesSubpattern: {
- unsigned disjunctionAlreadyCheckedCount = 0;
- if (term.quantityCount == 1 && !term.parentheses.isCopy) {
- unsigned alternativeFrameLocation = term.frameLocation;
- // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
- if (term.quantityType == QuantifierFixedCount)
- disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
- else
- alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
- atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
- } else if (term.parentheses.isTerminal) {
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
- atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
- } else {
- unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
- atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
- atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
- }
- break;
- }
-
- case PatternTerm::TypeParentheticalAssertion: {
- unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
-
- ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
- unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
- unsigned uncheckAmount = 0;
- if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
- uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
- uncheckInput(uncheckAmount);
- currentCountAlreadyChecked -= uncheckAmount;
- }
-
- atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
- emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
- atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
- if (uncheckAmount) {
- checkInput(uncheckAmount);
- currentCountAlreadyChecked += uncheckAmount;
- }
- break;
- }
-
- case PatternTerm::TypeDotStarEnclosure:
- assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
- break;
- }
- }
- }
- }
-
-private:
- YarrPattern& m_pattern;
- OwnPtr<ByteDisjunction> m_bodyDisjunction;
- unsigned m_currentAlternativeIndex;
- Vector<ParenthesesStackEntry> m_parenthesesStack;
- Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo;
-};
-
-PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
-{
- return ByteCompiler(pattern).compile(allocator);
-}
-
-unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
-{
- if (input.is8Bit())
- return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
- return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
-}
-
-unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
-{
- return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
-}
-
-unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
-{
- return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
-}
-
-// These should be the same for both UChar & LChar.
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
-
-
-} }
diff --git a/third_party/WebKit/Source/yarr/YarrInterpreter.h b/third_party/WebKit/Source/yarr/YarrInterpreter.h
deleted file mode 100644
index b3ceb7f..0000000
--- a/third_party/WebKit/Source/yarr/YarrInterpreter.h
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef YarrInterpreter_h
-#define YarrInterpreter_h
-
-#include "yarr/YarrPattern.h"
-#include <wtf/PassOwnPtr.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WTF {
-class BumpPointerAllocator;
-}
-using WTF::BumpPointerAllocator;
-
-namespace JSC { namespace Yarr {
-
-class ByteDisjunction;
-
-struct ByteTerm {
- enum Type {
- TypeBodyAlternativeBegin,
- TypeBodyAlternativeDisjunction,
- TypeBodyAlternativeEnd,
- TypeAlternativeBegin,
- TypeAlternativeDisjunction,
- TypeAlternativeEnd,
- TypeSubpatternBegin,
- TypeSubpatternEnd,
- TypeAssertionBOL,
- TypeAssertionEOL,
- TypeAssertionWordBoundary,
- TypePatternCharacterOnce,
- TypePatternCharacterFixed,
- TypePatternCharacterGreedy,
- TypePatternCharacterNonGreedy,
- TypePatternCasedCharacterOnce,
- TypePatternCasedCharacterFixed,
- TypePatternCasedCharacterGreedy,
- TypePatternCasedCharacterNonGreedy,
- TypeCharacterClass,
- TypeBackReference,
- TypeParenthesesSubpattern,
- TypeParenthesesSubpatternOnceBegin,
- TypeParenthesesSubpatternOnceEnd,
- TypeParenthesesSubpatternTerminalBegin,
- TypeParenthesesSubpatternTerminalEnd,
- TypeParentheticalAssertionBegin,
- TypeParentheticalAssertionEnd,
- TypeCheckInput,
- TypeUncheckInput,
- TypeDotStarEnclosure,
- } type;
- union {
- struct {
- union {
- UChar patternCharacter;
- struct {
- UChar lo;
- UChar hi;
- } casedCharacter;
- CharacterClass* characterClass;
- unsigned subpatternId;
- };
- union {
- ByteDisjunction* parenthesesDisjunction;
- unsigned parenthesesWidth;
- };
- QuantifierType quantityType;
- unsigned quantityCount;
- } atom;
- struct {
- int next;
- int end;
- bool onceThrough;
- } alternative;
- struct {
- bool m_bol : 1;
- bool m_eol : 1;
- } anchors;
- unsigned checkInputCount;
- };
- unsigned frameLocation;
- bool m_capture : 1;
- bool m_invert : 1;
- unsigned inputPosition;
-
- ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- : frameLocation(frameLocation)
- , m_capture(false)
- , m_invert(false)
- {
- switch (quantityType) {
- case QuantifierFixedCount:
- type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
- break;
- case QuantifierGreedy:
- type = ByteTerm::TypePatternCharacterGreedy;
- break;
- case QuantifierNonGreedy:
- type = ByteTerm::TypePatternCharacterNonGreedy;
- break;
- }
-
- atom.patternCharacter = ch;
- atom.quantityType = quantityType;
- atom.quantityCount = quantityCount.unsafeGet();
- inputPosition = inputPos;
- }
-
- ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
- : frameLocation(frameLocation)
- , m_capture(false)
- , m_invert(false)
- {
- switch (quantityType) {
- case QuantifierFixedCount:
- type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
- break;
- case QuantifierGreedy:
- type = ByteTerm::TypePatternCasedCharacterGreedy;
- break;
- case QuantifierNonGreedy:
- type = ByteTerm::TypePatternCasedCharacterNonGreedy;
- break;
- }
-
- atom.casedCharacter.lo = lo;
- atom.casedCharacter.hi = hi;
- atom.quantityType = quantityType;
- atom.quantityCount = quantityCount.unsafeGet();
- inputPosition = inputPos;
- }
-
- ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
- : type(ByteTerm::TypeCharacterClass)
- , m_capture(false)
- , m_invert(invert)
- {
- atom.characterClass = characterClass;
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- inputPosition = inputPos;
- }
-
- ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
- : type(type)
- , m_capture(capture)
- , m_invert(false)
- {
- atom.subpatternId = subpatternId;
- atom.parenthesesDisjunction = parenthesesInfo;
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- inputPosition = inputPos;
- }
-
- ByteTerm(Type type, bool invert = false)
- : type(type)
- , m_capture(false)
- , m_invert(invert)
- {
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- }
-
- ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
- : type(type)
- , m_capture(capture)
- , m_invert(invert)
- {
- atom.subpatternId = subpatternId;
- atom.quantityType = QuantifierFixedCount;
- atom.quantityCount = 1;
- inputPosition = inputPos;
- }
-
- static ByteTerm BOL(int inputPos)
- {
- ByteTerm term(TypeAssertionBOL);
- term.inputPosition = inputPos;
- return term;
- }
-
- static ByteTerm CheckInput(Checked<unsigned> count)
- {
- ByteTerm term(TypeCheckInput);
- term.checkInputCount = count.unsafeGet();
- return term;
- }
-
- static ByteTerm UncheckInput(Checked<unsigned> count)
- {
- ByteTerm term(TypeUncheckInput);
- term.checkInputCount = count.unsafeGet();
- return term;
- }
-
- static ByteTerm EOL(int inputPos)
- {
- ByteTerm term(TypeAssertionEOL);
- term.inputPosition = inputPos;
- return term;
- }
-
- static ByteTerm WordBoundary(bool invert, int inputPos)
- {
- ByteTerm term(TypeAssertionWordBoundary, invert);
- term.inputPosition = inputPos;
- return term;
- }
-
- static ByteTerm BackReference(unsigned subpatternId, int inputPos)
- {
- return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
- }
-
- static ByteTerm BodyAlternativeBegin(bool onceThrough)
- {
- ByteTerm term(TypeBodyAlternativeBegin);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = onceThrough;
- return term;
- }
-
- static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
- {
- ByteTerm term(TypeBodyAlternativeDisjunction);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = onceThrough;
- return term;
- }
-
- static ByteTerm BodyAlternativeEnd()
- {
- ByteTerm term(TypeBodyAlternativeEnd);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm AlternativeBegin()
- {
- ByteTerm term(TypeAlternativeBegin);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm AlternativeDisjunction()
- {
- ByteTerm term(TypeAlternativeDisjunction);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm AlternativeEnd()
- {
- ByteTerm term(TypeAlternativeEnd);
- term.alternative.next = 0;
- term.alternative.end = 0;
- term.alternative.onceThrough = false;
- return term;
- }
-
- static ByteTerm SubpatternBegin()
- {
- return ByteTerm(TypeSubpatternBegin);
- }
-
- static ByteTerm SubpatternEnd()
- {
- return ByteTerm(TypeSubpatternEnd);
- }
-
- static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
- {
- ByteTerm term(TypeDotStarEnclosure);
- term.anchors.m_bol = bolAnchor;
- term.anchors.m_eol = eolAnchor;
- return term;
- }
-
- bool invert()
- {
- return m_invert;
- }
-
- bool capture()
- {
- return m_capture;
- }
-};
-
-class ByteDisjunction {
- WTF_MAKE_FAST_ALLOCATED;
-public:
- ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
- : m_numSubpatterns(numSubpatterns)
- , m_frameSize(frameSize)
- {
- }
-
- Vector<ByteTerm> terms;
- unsigned m_numSubpatterns;
- unsigned m_frameSize;
-};
-
-struct BytecodePattern {
- WTF_MAKE_FAST_ALLOCATED;
-public:
- BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<OwnPtr<ByteDisjunction> >& parenthesesInfoToAdopt, YarrPattern& pattern, BumpPointerAllocator* allocator)
- : m_body(body)
- , m_ignoreCase(pattern.m_ignoreCase)
- , m_multiline(pattern.m_multiline)
- , m_allocator(allocator)
- {
- m_body->terms.shrinkToFit();
-
- newlineCharacterClass = pattern.newlineCharacterClass();
- wordcharCharacterClass = pattern.wordcharCharacterClass();
-
- m_allParenthesesInfo.swap(parenthesesInfoToAdopt);
- m_allParenthesesInfo.shrinkToFit();
-
- m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
- m_userCharacterClasses.shrinkToFit();
- }
-
- OwnPtr<ByteDisjunction> m_body;
- bool m_ignoreCase;
- bool m_multiline;
- // Each BytecodePattern is associated with a RegExp, each RegExp is associated
- // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
- BumpPointerAllocator* m_allocator;
-
- CharacterClass* newlineCharacterClass;
- CharacterClass* wordcharCharacterClass;
-
-private:
- Vector<OwnPtr<ByteDisjunction> > m_allParenthesesInfo;
- Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
-};
-
-PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
-unsigned interpret(BytecodePattern*, const String& input, unsigned start, unsigned* output);
-unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
-unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
-
-} } // namespace JSC::Yarr
-
-#endif // YarrInterpreter_h
diff --git a/third_party/WebKit/Source/yarr/YarrParser.h b/third_party/WebKit/Source/yarr/YarrParser.h
deleted file mode 100644
index 61b016a..0000000
--- a/third_party/WebKit/Source/yarr/YarrParser.h
+++ /dev/null
@@ -1,880 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef YarrParser_h
-#define YarrParser_h
-
-#include "yarr/Yarr.h"
-#include <wtf/ASCIICType.h>
-#include <wtf/text/WTFString.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace JSC { namespace Yarr {
-
-#define REGEXP_ERROR_PREFIX "Invalid regular expression: "
-
-enum BuiltInCharacterClassID {
- DigitClassID,
- SpaceClassID,
- WordClassID,
- NewlineClassID,
-};
-
-// The Parser class should not be used directly - only via the Yarr::parse() method.
-template<class Delegate, typename CharType>
-class Parser {
-private:
- template<class FriendDelegate>
- friend const char* parse(FriendDelegate&, const String& pattern, unsigned backReferenceLimit);
-
- enum ErrorCode {
- NoError,
- PatternTooLarge,
- QuantifierOutOfOrder,
- QuantifierWithoutAtom,
- QuantifierTooLarge,
- MissingParentheses,
- ParenthesesUnmatched,
- ParenthesesTypeInvalid,
- CharacterClassUnmatched,
- CharacterClassOutOfOrder,
- EscapeUnterminated,
- NumberOfErrorCodes
- };
-
- /*
- * CharacterClassParserDelegate:
- *
- * The class CharacterClassParserDelegate is used in the parsing of character
- * classes. This class handles detection of character ranges. This class
- * implements enough of the delegate interface such that it can be passed to
- * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
- * to perform the parsing of escape characters in character sets.
- */
- class CharacterClassParserDelegate {
- public:
- CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
- : m_delegate(delegate)
- , m_err(err)
- , m_state(Empty)
- , m_character(0)
- {
- }
-
- /*
- * begin():
- *
- * Called at beginning of construction.
- */
- void begin(bool invert)
- {
- m_delegate.atomCharacterClassBegin(invert);
- }
-
- /*
- * atomPatternCharacter():
- *
- * This method is called either from parseCharacterClass() (for an unescaped
- * character in a character class), or from parseEscape(). In the former case
- * the value true will be passed for the argument 'hyphenIsRange', and in this
- * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/
- * is different to /[a\-z]/).
- */
- void atomPatternCharacter(UChar ch, bool hyphenIsRange = false)
- {
- switch (m_state) {
- case AfterCharacterClass:
- // Following a builtin character class we need look out for a hyphen.
- // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/.
- // If we see a hyphen following a charater class then unlike usual
- // we'll report it to the delegate immediately, and put ourself into
- // a poisoned state. Any following calls to add another character or
- // character class will result in an error. (A hypen following a
- // character-class is itself valid, but only at the end of a regex).
- if (hyphenIsRange && ch == '-') {
- m_delegate.atomCharacterClassAtom('-');
- m_state = AfterCharacterClassHyphen;
- return;
- }
- // Otherwise just fall through - cached character so treat this as Empty.
-
- case Empty:
- m_character = ch;
- m_state = CachedCharacter;
- return;
-
- case CachedCharacter:
- if (hyphenIsRange && ch == '-')
- m_state = CachedCharacterHyphen;
- else {
- m_delegate.atomCharacterClassAtom(m_character);
- m_character = ch;
- }
- return;
-
- case CachedCharacterHyphen:
- if (ch < m_character) {
- m_err = CharacterClassOutOfOrder;
- return;
- }
- m_delegate.atomCharacterClassRange(m_character, ch);
- m_state = Empty;
- return;
-
- // See coment in atomBuiltInCharacterClass below.
- // This too is technically an error, per ECMA-262, and again we
- // we chose to allow this. Note a subtlely here that while we
- // diverge from the spec's definition of CharacterRange we do
- // remain in compliance with the grammar. For example, consider
- // the expression /[\d-a-z]/. We comply with the grammar in
- // this case by not allowing a-z to be matched as a range.
- case AfterCharacterClassHyphen:
- m_delegate.atomCharacterClassAtom(ch);
- m_state = Empty;
- return;
- }
- }
-
- /*
- * atomBuiltInCharacterClass():
- *
- * Adds a built-in character class, called by parseEscape().
- */
- void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
- {
- switch (m_state) {
- case CachedCharacter:
- // Flush the currently cached character, then fall through.
- m_delegate.atomCharacterClassAtom(m_character);
-
- case Empty:
- case AfterCharacterClass:
- m_state = AfterCharacterClass;
- m_delegate.atomCharacterClassBuiltIn(classID, invert);
- return;
-
- // If we hit either of these cases, we have an invalid range that
- // looks something like /[x-\d]/ or /[\d-\d]/.
- // According to ECMA-262 this should be a syntax error, but
- // empirical testing shows this to break teh webz. Instead we
- // comply with to the ECMA-262 grammar, and assume the grammar to
- // have matched the range correctly, but tweak our interpretation
- // of CharacterRange. Effectively we implicitly handle the hyphen
- // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/.
- case CachedCharacterHyphen:
- m_delegate.atomCharacterClassAtom(m_character);
- m_delegate.atomCharacterClassAtom('-');
- // fall through
- case AfterCharacterClassHyphen:
- m_delegate.atomCharacterClassBuiltIn(classID, invert);
- m_state = Empty;
- return;
- }
- }
-
- /*
- * end():
- *
- * Called at end of construction.
- */
- void end()
- {
- if (m_state == CachedCharacter)
- m_delegate.atomCharacterClassAtom(m_character);
- else if (m_state == CachedCharacterHyphen) {
- m_delegate.atomCharacterClassAtom(m_character);
- m_delegate.atomCharacterClassAtom('-');
- }
- m_delegate.atomCharacterClassEnd();
- }
-
- // parseEscape() should never call these delegate methods when
- // invoked with inCharacterClass set.
- NO_RETURN_DUE_TO_ASSERT void assertionWordBoundary(bool) { RELEASE_ASSERT_NOT_REACHED(); }
- NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned) { RELEASE_ASSERT_NOT_REACHED(); }
-
- private:
- Delegate& m_delegate;
- ErrorCode& m_err;
- enum CharacterClassConstructionState {
- Empty,
- CachedCharacter,
- CachedCharacterHyphen,
- AfterCharacterClass,
- AfterCharacterClassHyphen,
- } m_state;
- UChar m_character;
- };
-
- Parser(Delegate& delegate, const String& pattern, unsigned backReferenceLimit)
- : m_delegate(delegate)
- , m_backReferenceLimit(backReferenceLimit)
- , m_err(NoError)
- , m_data(pattern.getCharacters<CharType>())
- , m_size(pattern.length())
- , m_index(0)
- , m_parenthesesNestingDepth(0)
- {
- }
-
- /*
- * parseEscape():
- *
- * Helper for parseTokens() AND parseCharacterClass().
- * Unlike the other parser methods, this function does not report tokens
- * directly to the member delegate (m_delegate), instead tokens are
- * emitted to the delegate provided as an argument. In the case of atom
- * escapes, parseTokens() will call parseEscape() passing m_delegate as
- * an argument, and as such the escape will be reported to the delegate.
- *
- * However this method may also be used by parseCharacterClass(), in which
- * case a CharacterClassParserDelegate will be passed as the delegate that
- * tokens should be added to. A boolean flag is also provided to indicate
- * whether that an escape in a CharacterClass is being parsed (some parsing
- * rules change in this context).
- *
- * The boolean value returned by this method indicates whether the token
- * parsed was an atom (outside of a characted class \b and \B will be
- * interpreted as assertions).
- */
- template<bool inCharacterClass, class EscapeDelegate>
- bool parseEscape(EscapeDelegate& delegate)
- {
- ASSERT(!m_err);
- ASSERT(peek() == '\\');
- consume();
-
- if (atEndOfPattern()) {
- m_err = EscapeUnterminated;
- return false;
- }
-
- switch (peek()) {
- // Assertions
- case 'b':
- consume();
- if (inCharacterClass)
- delegate.atomPatternCharacter('\b');
- else {
- delegate.assertionWordBoundary(false);
- return false;
- }
- break;
- case 'B':
- consume();
- if (inCharacterClass)
- delegate.atomPatternCharacter('B');
- else {
- delegate.assertionWordBoundary(true);
- return false;
- }
- break;
-
- // CharacterClassEscape
- case 'd':
- consume();
- delegate.atomBuiltInCharacterClass(DigitClassID, false);
- break;
- case 's':
- consume();
- delegate.atomBuiltInCharacterClass(SpaceClassID, false);
- break;
- case 'w':
- consume();
- delegate.atomBuiltInCharacterClass(WordClassID, false);
- break;
- case 'D':
- consume();
- delegate.atomBuiltInCharacterClass(DigitClassID, true);
- break;
- case 'S':
- consume();
- delegate.atomBuiltInCharacterClass(SpaceClassID, true);
- break;
- case 'W':
- consume();
- delegate.atomBuiltInCharacterClass(WordClassID, true);
- break;
-
- // DecimalEscape
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9': {
- // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
- // First, try to parse this as backreference.
- if (!inCharacterClass) {
- ParseState state = saveState();
-
- unsigned backReference = consumeNumber();
- if (backReference <= m_backReferenceLimit) {
- delegate.atomBackReference(backReference);
- break;
- }
-
- restoreState(state);
- }
-
- // Not a backreference, and not octal.
- if (peek() >= '8') {
- delegate.atomPatternCharacter('\\');
- break;
- }
-
- // Fall-through to handle this as an octal escape.
- }
-
- // Octal escape
- case '0':
- delegate.atomPatternCharacter(consumeOctal());
- break;
-
- // ControlEscape
- case 'f':
- consume();
- delegate.atomPatternCharacter('\f');
- break;
- case 'n':
- consume();
- delegate.atomPatternCharacter('\n');
- break;
- case 'r':
- consume();
- delegate.atomPatternCharacter('\r');
- break;
- case 't':
- consume();
- delegate.atomPatternCharacter('\t');
- break;
- case 'v':
- consume();
- delegate.atomPatternCharacter('\v');
- break;
-
- // ControlLetter
- case 'c': {
- ParseState state = saveState();
- consume();
- if (!atEndOfPattern()) {
- int control = consume();
-
- // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
- if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
- delegate.atomPatternCharacter(control & 0x1f);
- break;
- }
- }
- restoreState(state);
- delegate.atomPatternCharacter('\\');
- break;
- }
-
- // HexEscape
- case 'x': {
- consume();
- int x = tryConsumeHex(2);
- if (x == -1)
- delegate.atomPatternCharacter('x');
- else
- delegate.atomPatternCharacter(x);
- break;
- }
-
- // UnicodeEscape
- case 'u': {
- consume();
- int u = tryConsumeHex(4);
- if (u == -1)
- delegate.atomPatternCharacter('u');
- else
- delegate.atomPatternCharacter(u);
- break;
- }
-
- // IdentityEscape
- default:
- delegate.atomPatternCharacter(consume());
- }
-
- return true;
- }
-
- /*
- * parseAtomEscape(), parseCharacterClassEscape():
- *
- * These methods alias to parseEscape().
- */
- bool parseAtomEscape()
- {
- return parseEscape<false>(m_delegate);
- }
- void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
- {
- parseEscape<true>(delegate);
- }
-
- /*
- * parseCharacterClass():
- *
- * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
- * to an instance of CharacterClassParserDelegate, to describe the character class to the
- * delegate.
- */
- void parseCharacterClass()
- {
- ASSERT(!m_err);
- ASSERT(peek() == '[');
- consume();
-
- CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
-
- characterClassConstructor.begin(tryConsume('^'));
-
- while (!atEndOfPattern()) {
- switch (peek()) {
- case ']':
- consume();
- characterClassConstructor.end();
- return;
-
- case '\\':
- parseCharacterClassEscape(characterClassConstructor);
- break;
-
- default:
- characterClassConstructor.atomPatternCharacter(consume(), true);
- }
-
- if (m_err)
- return;
- }
-
- m_err = CharacterClassUnmatched;
- }
-
- /*
- * parseParenthesesBegin():
- *
- * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
- */
- void parseParenthesesBegin()
- {
- ASSERT(!m_err);
- ASSERT(peek() == '(');
- consume();
-
- if (tryConsume('?')) {
- if (atEndOfPattern()) {
- m_err = ParenthesesTypeInvalid;
- return;
- }
-
- switch (consume()) {
- case ':':
- m_delegate.atomParenthesesSubpatternBegin(false);
- break;
-
- case '=':
- m_delegate.atomParentheticalAssertionBegin();
- break;
-
- case '!':
- m_delegate.atomParentheticalAssertionBegin(true);
- break;
-
- default:
- m_err = ParenthesesTypeInvalid;
- }
- } else
- m_delegate.atomParenthesesSubpatternBegin();
-
- ++m_parenthesesNestingDepth;
- }
-
- /*
- * parseParenthesesEnd():
- *
- * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
- */
- void parseParenthesesEnd()
- {
- ASSERT(!m_err);
- ASSERT(peek() == ')');
- consume();
-
- if (m_parenthesesNestingDepth > 0)
- m_delegate.atomParenthesesEnd();
- else
- m_err = ParenthesesUnmatched;
-
- --m_parenthesesNestingDepth;
- }
-
- /*
- * parseQuantifier():
- *
- * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
- */
- void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
- {
- ASSERT(!m_err);
- ASSERT(min <= max);
-
- if (min == UINT_MAX) {
- m_err = QuantifierTooLarge;
- return;
- }
-
- if (lastTokenWasAnAtom)
- m_delegate.quantifyAtom(min, max, !tryConsume('?'));
- else
- m_err = QuantifierWithoutAtom;
- }
-
- /*
- * parseTokens():
- *
- * This method loops over the input pattern reporting tokens to the delegate.
- * The method returns when a parse error is detected, or the end of the pattern
- * is reached. One piece of state is tracked around the loop, which is whether
- * the last token passed to the delegate was an atom (this is necessary to detect
- * a parse error when a quantifier provided without an atom to quantify).
- */
- void parseTokens()
- {
- bool lastTokenWasAnAtom = false;
-
- while (!atEndOfPattern()) {
- switch (peek()) {
- case '|':
- consume();
- m_delegate.disjunction();
- lastTokenWasAnAtom = false;
- break;
-
- case '(':
- parseParenthesesBegin();
- lastTokenWasAnAtom = false;
- break;
-
- case ')':
- parseParenthesesEnd();
- lastTokenWasAnAtom = true;
- break;
-
- case '^':
- consume();
- m_delegate.assertionBOL();
- lastTokenWasAnAtom = false;
- break;
-
- case '$':
- consume();
- m_delegate.assertionEOL();
- lastTokenWasAnAtom = false;
- break;
-
- case '.':
- consume();
- m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
- lastTokenWasAnAtom = true;
- break;
-
- case '[':
- parseCharacterClass();
- lastTokenWasAnAtom = true;
- break;
-
- case '\\':
- lastTokenWasAnAtom = parseAtomEscape();
- break;
-
- case '*':
- consume();
- parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite);
- lastTokenWasAnAtom = false;
- break;
-
- case '+':
- consume();
- parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite);
- lastTokenWasAnAtom = false;
- break;
-
- case '?':
- consume();
- parseQuantifier(lastTokenWasAnAtom, 0, 1);
- lastTokenWasAnAtom = false;
- break;
-
- case '{': {
- ParseState state = saveState();
-
- consume();
- if (peekIsDigit()) {
- unsigned min = consumeNumber();
- unsigned max = min;
-
- if (tryConsume(','))
- max = peekIsDigit() ? consumeNumber() : quantifyInfinite;
-
- if (tryConsume('}')) {
- if (min <= max)
- parseQuantifier(lastTokenWasAnAtom, min, max);
- else
- m_err = QuantifierOutOfOrder;
- lastTokenWasAnAtom = false;
- break;
- }
- }
-
- restoreState(state);
- } // if we did not find a complete quantifer, fall through to the default case.
-
- default:
- m_delegate.atomPatternCharacter(consume());
- lastTokenWasAnAtom = true;
- }
-
- if (m_err)
- return;
- }
-
- if (m_parenthesesNestingDepth > 0)
- m_err = MissingParentheses;
- }
-
- /*
- * parse():
- *
- * This method calls parseTokens() to parse over the input and converts any
- * error code to a const char* for a result.
- */
- const char* parse()
- {
- if (m_size > MAX_PATTERN_SIZE)
- m_err = PatternTooLarge;
- else
- parseTokens();
- ASSERT(atEndOfPattern() || m_err);
-
- // The order of this array must match the ErrorCode enum.
- static const char* errorMessages[NumberOfErrorCodes] = {
- 0, // NoError
- REGEXP_ERROR_PREFIX "regular expression too large",
- REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier",
- REGEXP_ERROR_PREFIX "nothing to repeat",
- REGEXP_ERROR_PREFIX "number too large in {} quantifier",
- REGEXP_ERROR_PREFIX "missing )",
- REGEXP_ERROR_PREFIX "unmatched parentheses",
- REGEXP_ERROR_PREFIX "unrecognized character after (?",
- REGEXP_ERROR_PREFIX "missing terminating ] for character class",
- REGEXP_ERROR_PREFIX "range out of order in character class",
- REGEXP_ERROR_PREFIX "\\ at end of pattern"
- };
-
- return errorMessages[m_err];
- }
-
- // Misc helper functions:
-
- typedef unsigned ParseState;
-
- ParseState saveState()
- {
- return m_index;
- }
-
- void restoreState(ParseState state)
- {
- m_index = state;
- }
-
- bool atEndOfPattern()
- {
- ASSERT(m_index <= m_size);
- return m_index == m_size;
- }
-
- int peek()
- {
- ASSERT(m_index < m_size);
- return m_data[m_index];
- }
-
- bool peekIsDigit()
- {
- return !atEndOfPattern() && WTF::isASCIIDigit(peek());
- }
-
- unsigned peekDigit()
- {
- ASSERT(peekIsDigit());
- return peek() - '0';
- }
-
- int consume()
- {
- ASSERT(m_index < m_size);
- return m_data[m_index++];
- }
-
- unsigned consumeDigit()
- {
- ASSERT(peekIsDigit());
- return consume() - '0';
- }
-
- unsigned consumeNumber()
- {
- unsigned n = consumeDigit();
- // check for overflow.
- for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
- n = newValue;
- consume();
- }
- return n;
- }
-
- unsigned consumeOctal()
- {
- ASSERT(WTF::isASCIIOctalDigit(peek()));
-
- unsigned n = consumeDigit();
- while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
- n = n * 8 + consumeDigit();
- return n;
- }
-
- bool tryConsume(UChar ch)
- {
- if (atEndOfPattern() || (m_data[m_index] != ch))
- return false;
- ++m_index;
- return true;
- }
-
- int tryConsumeHex(int count)
- {
- ParseState state = saveState();
-
- int n = 0;
- while (count--) {
- if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
- restoreState(state);
- return -1;
- }
- n = (n << 4) | WTF::toASCIIHexValue(consume());
- }
- return n;
- }
-
- Delegate& m_delegate;
- unsigned m_backReferenceLimit;
- ErrorCode m_err;
- const CharType* m_data;
- unsigned m_size;
- unsigned m_index;
- unsigned m_parenthesesNestingDepth;
-
- // Derived by empirical testing of compile time in PCRE and WREC.
- static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
-};
-
-/*
- * Yarr::parse():
- *
- * The parse method is passed a pattern to be parsed and a delegate upon which
- * callbacks will be made to record the parsed tokens forming the regex.
- * Yarr::parse() returns null on success, or a const C string providing an error
- * message where a parse error occurs.
- *
- * The Delegate must implement the following interface:
- *
- * void assertionBOL();
- * void assertionEOL();
- * void assertionWordBoundary(bool invert);
- *
- * void atomPatternCharacter(UChar ch);
- * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
- * void atomCharacterClassBegin(bool invert)
- * void atomCharacterClassAtom(UChar ch)
- * void atomCharacterClassRange(UChar begin, UChar end)
- * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
- * void atomCharacterClassEnd()
- * void atomParenthesesSubpatternBegin(bool capture = true);
- * void atomParentheticalAssertionBegin(bool invert = false);
- * void atomParenthesesEnd();
- * void atomBackReference(unsigned subpatternId);
- *
- * void quantifyAtom(unsigned min, unsigned max, bool greedy);
- *
- * void disjunction();
- *
- * The regular expression is described by a sequence of assertion*() and atom*()
- * callbacks to the delegate, describing the terms in the regular expression.
- * Following an atom a quantifyAtom() call may occur to indicate that the previous
- * atom should be quantified. In the case of atoms described across multiple
- * calls (parentheses and character classes) the call to quantifyAtom() will come
- * after the call to the atom*End() method, never after atom*Begin().
- *
- * Character classes may either be described by a single call to
- * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
- * In the latter case, ...Begin() will be called, followed by a sequence of
- * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
- *
- * Sequences of atoms and assertions are broken into alternatives via calls to
- * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
- * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
- * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
- * capturing subpattern, this will be the subpatternId associated with these
- * parentheses, and will also by definition be the lowest subpatternId of these
- * parentheses and of any nested paretheses. The atomParenthesesEnd() method
- * is passed the subpatternId of the last capturing subexpression nested within
- * these paretheses. In the case of a capturing subpattern with no nested
- * capturing subpatterns, the same subpatternId will be passed to the begin and
- * end functions. In the case of non-capturing subpatterns the subpatternId
- * passed to the begin method is also the first possible subpatternId that might
- * be nested within these paretheses. If a set of non-capturing parentheses does
- * not contain any capturing subpatterns, then the subpatternId passed to begin
- * will be greater than the subpatternId passed to end.
- */
-
-template<class Delegate>
-const char* parse(Delegate& delegate, const String& pattern, unsigned backReferenceLimit = quantifyInfinite)
-{
- if (pattern.is8Bit())
- return Parser<Delegate, LChar>(delegate, pattern, backReferenceLimit).parse();
- return Parser<Delegate, UChar>(delegate, pattern, backReferenceLimit).parse();
-}
-
-} } // namespace JSC::Yarr
-
-#endif // YarrParser_h
diff --git a/third_party/WebKit/Source/yarr/YarrPattern.cpp b/third_party/WebKit/Source/yarr/YarrPattern.cpp
deleted file mode 100644
index 978c074..0000000
--- a/third_party/WebKit/Source/yarr/YarrPattern.cpp
+++ /dev/null
@@ -1,880 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "yarr/YarrPattern.h"
-
-#include "yarr/Yarr.h"
-#include "yarr/YarrCanonicalizeUCS2.h"
-#include "yarr/YarrParser.h"
-#include <wtf/Vector.h>
-
-using namespace WTF;
-
-namespace JSC { namespace Yarr {
-
-#include "RegExpJitTables.h"
-
-class CharacterClassConstructor {
-public:
- CharacterClassConstructor(bool isCaseInsensitive = false)
- : m_isCaseInsensitive(isCaseInsensitive)
- {
- }
-
- void reset()
- {
- m_matches.clear();
- m_ranges.clear();
- m_matchesUnicode.clear();
- m_rangesUnicode.clear();
- }
-
- void append(const CharacterClass* other)
- {
- for (size_t i = 0; i < other->m_matches.size(); ++i)
- addSorted(m_matches, other->m_matches[i]);
- for (size_t i = 0; i < other->m_ranges.size(); ++i)
- addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
- for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
- addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
- for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
- addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
- }
-
- void putChar(UChar ch)
- {
- // Handle ascii cases.
- if (ch <= 0x7f) {
- if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
- addSorted(m_matches, toASCIIUpper(ch));
- addSorted(m_matches, toASCIILower(ch));
- } else
- addSorted(m_matches, ch);
- return;
- }
-
- // Simple case, not a case-insensitive match.
- if (!m_isCaseInsensitive) {
- addSorted(m_matchesUnicode, ch);
- return;
- }
-
- // Add multiple matches, if necessary.
- UCS2CanonicalizationRange* info = rangeInfoFor(ch);
- if (info->type == CanonicalizeUnique)
- addSorted(m_matchesUnicode, ch);
- else
- putUnicodeIgnoreCase(ch, info);
- }
-
- void putUnicodeIgnoreCase(UChar ch, UCS2CanonicalizationRange* info)
- {
- ASSERT(m_isCaseInsensitive);
- ASSERT(ch > 0x7f);
- ASSERT(ch >= info->begin && ch <= info->end);
- ASSERT(info->type != CanonicalizeUnique);
- if (info->type == CanonicalizeSet) {
- for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
- addSorted(m_matchesUnicode, ch);
- } else {
- addSorted(m_matchesUnicode, ch);
- addSorted(m_matchesUnicode, getCanonicalPair(info, ch));
- }
- }
-
- void putRange(UChar lo, UChar hi)
- {
- if (lo <= 0x7f) {
- char asciiLo = lo;
- char asciiHi = std::min(hi, (UChar)0x7f);
- addSortedRange(m_ranges, lo, asciiHi);
-
- if (m_isCaseInsensitive) {
- if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
- addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
- if ((asciiLo <= 'z') && (asciiHi >= 'a'))
- addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
- }
- }
- if (hi <= 0x7f)
- return;
-
- lo = std::max(lo, (UChar)0x80);
- addSortedRange(m_rangesUnicode, lo, hi);
-
- if (!m_isCaseInsensitive)
- return;
-
- UCS2CanonicalizationRange* info = rangeInfoFor(lo);
- while (true) {
- // Handle the range [lo .. end]
- UChar end = std::min<UChar>(info->end, hi);
-
- switch (info->type) {
- case CanonicalizeUnique:
- // Nothing to do - no canonical equivalents.
- break;
- case CanonicalizeSet: {
- UChar ch;
- for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
- addSorted(m_matchesUnicode, ch);
- break;
- }
- case CanonicalizeRangeLo:
- addSortedRange(m_rangesUnicode, lo + info->value, end + info->value);
- break;
- case CanonicalizeRangeHi:
- addSortedRange(m_rangesUnicode, lo - info->value, end - info->value);
- break;
- case CanonicalizeAlternatingAligned:
- // Use addSortedRange since there is likely an abutting range to combine with.
- if (lo & 1)
- addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
- if (!(end & 1))
- addSortedRange(m_rangesUnicode, end + 1, end + 1);
- break;
- case CanonicalizeAlternatingUnaligned:
- // Use addSortedRange since there is likely an abutting range to combine with.
- if (!(lo & 1))
- addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
- if (end & 1)
- addSortedRange(m_rangesUnicode, end + 1, end + 1);
- break;
- }
-
- if (hi == end)
- return;
-
- ++info;
- lo = info->begin;
- };
-
- }
-
- PassOwnPtr<CharacterClass> charClass()
- {
- OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass(0));
-
- characterClass->m_matches.swap(m_matches);
- characterClass->m_ranges.swap(m_ranges);
- characterClass->m_matchesUnicode.swap(m_matchesUnicode);
- characterClass->m_rangesUnicode.swap(m_rangesUnicode);
-
- return characterClass.release();
- }
-
-private:
- void addSorted(Vector<UChar>& matches, UChar ch)
- {
- unsigned pos = 0;
- unsigned range = matches.size();
-
- // binary chop, find position to insert char.
- while (range) {
- unsigned index = range >> 1;
-
- int val = matches[pos+index] - ch;
- if (!val)
- return;
- else if (val > 0)
- range = index;
- else {
- pos += (index+1);
- range -= (index+1);
- }
- }
-
- if (pos == matches.size())
- matches.append(ch);
- else
- matches.insert(pos, ch);
- }
-
- void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
- {
- unsigned end = ranges.size();
-
- // Simple linear scan - I doubt there are that many ranges anyway...
- // feel free to fix this with something faster (eg binary chop).
- for (unsigned i = 0; i < end; ++i) {
- // does the new range fall before the current position in the array
- if (hi < ranges[i].begin) {
- // optional optimization: concatenate appending ranges? - may not be worthwhile.
- if (hi == (ranges[i].begin - 1)) {
- ranges[i].begin = lo;
- return;
- }
- ranges.insert(i, CharacterRange(lo, hi));
- return;
- }
- // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
- // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
- // end of the last range they concatenate, which is just as good.
- if (lo <= (ranges[i].end + 1)) {
- // found an intersect! we'll replace this entry in the array.
- ranges[i].begin = std::min(ranges[i].begin, lo);
- ranges[i].end = std::max(ranges[i].end, hi);
-
- // now check if the new range can subsume any subsequent ranges.
- unsigned next = i+1;
- // each iteration of the loop we will either remove something from the list, or break the loop.
- while (next < ranges.size()) {
- if (ranges[next].begin <= (ranges[i].end + 1)) {
- // the next entry now overlaps / concatenates this one.
- ranges[i].end = std::max(ranges[i].end, ranges[next].end);
- ranges.remove(next);
- } else
- break;
- }
-
- return;
- }
- }
-
- // CharacterRange comes after all existing ranges.
- ranges.append(CharacterRange(lo, hi));
- }
-
- bool m_isCaseInsensitive;
-
- Vector<UChar> m_matches;
- Vector<CharacterRange> m_ranges;
- Vector<UChar> m_matchesUnicode;
- Vector<CharacterRange> m_rangesUnicode;
-};
-
-class YarrPatternConstructor {
-public:
- YarrPatternConstructor(YarrPattern& pattern)
- : m_pattern(pattern)
- , m_characterClassConstructor(pattern.m_ignoreCase)
- , m_invertParentheticalAssertion(false)
- {
- OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
- m_pattern.m_body = body.get();
- m_alternative = body->addNewAlternative();
- m_pattern.m_disjunctions.append(body.release());
- }
-
- ~YarrPatternConstructor()
- {
- }
-
- void reset()
- {
- m_pattern.reset();
- m_characterClassConstructor.reset();
-
- OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
- m_pattern.m_body = body.get();
- m_alternative = body->addNewAlternative();
- m_pattern.m_disjunctions.append(body.release());
- }
-
- void assertionBOL()
- {
- if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
- m_alternative->m_startsWithBOL = true;
- m_alternative->m_containsBOL = true;
- m_pattern.m_containsBOL = true;
- }
- m_alternative->m_terms.append(PatternTerm::BOL());
- }
- void assertionEOL()
- {
- m_alternative->m_terms.append(PatternTerm::EOL());
- }
- void assertionWordBoundary(bool invert)
- {
- m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
- }
-
- void atomPatternCharacter(UChar ch)
- {
- // We handle case-insensitive checking of unicode characters which do have both
- // cases by handling them as if they were defined using a CharacterClass.
- if (!m_pattern.m_ignoreCase || isASCII(ch)) {
- m_alternative->m_terms.append(PatternTerm(ch));
- return;
- }
-
- UCS2CanonicalizationRange* info = rangeInfoFor(ch);
- if (info->type == CanonicalizeUnique) {
- m_alternative->m_terms.append(PatternTerm(ch));
- return;
- }
-
- m_characterClassConstructor.putUnicodeIgnoreCase(ch, info);
- OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
- m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false));
- m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
- }
-
- void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
- {
- switch (classID) {
- case DigitClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
- break;
- case SpaceClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
- break;
- case WordClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
- break;
- case NewlineClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
- break;
- }
- }
-
- void atomCharacterClassBegin(bool invert = false)
- {
- m_invertCharacterClass = invert;
- }
-
- void atomCharacterClassAtom(UChar ch)
- {
- m_characterClassConstructor.putChar(ch);
- }
-
- void atomCharacterClassRange(UChar begin, UChar end)
- {
- m_characterClassConstructor.putRange(begin, end);
- }
-
- void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
- {
- ASSERT(classID != NewlineClassID);
-
- switch (classID) {
- case DigitClassID:
- m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
- break;
-
- case SpaceClassID:
- m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
- break;
-
- case WordClassID:
- m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
- break;
-
- default:
- RELEASE_ASSERT_NOT_REACHED();
- }
- }
-
- void atomCharacterClassEnd()
- {
- OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
- m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass));
- m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
- }
-
- void atomParenthesesSubpatternBegin(bool capture = true)
- {
- unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
- if (capture)
- m_pattern.m_numSubpatterns++;
-
- OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false));
- m_alternative = parenthesesDisjunction->addNewAlternative();
- m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
- }
-
- void atomParentheticalAssertionBegin(bool invert = false)
- {
- OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
- m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert));
- m_alternative = parenthesesDisjunction->addNewAlternative();
- m_invertParentheticalAssertion = invert;
- m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
- }
-
- void atomParenthesesEnd()
- {
- ASSERT(m_alternative->m_parent);
- ASSERT(m_alternative->m_parent->m_parent);
-
- PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent;
- m_alternative = m_alternative->m_parent->m_parent;
-
- PatternTerm& lastTerm = m_alternative->lastTerm();
-
- unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
- unsigned numBOLAnchoredAlts = 0;
-
- for (unsigned i = 0; i < numParenAlternatives; i++) {
- // Bubble up BOL flags
- if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL)
- numBOLAnchoredAlts++;
- }
-
- if (numBOLAnchoredAlts) {
- m_alternative->m_containsBOL = true;
- // If all the alternatives in parens start with BOL, then so does this one
- if (numBOLAnchoredAlts == numParenAlternatives)
- m_alternative->m_startsWithBOL = true;
- }
-
- lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
- m_invertParentheticalAssertion = false;
- }
-
- void atomBackReference(unsigned subpatternId)
- {
- ASSERT(subpatternId);
- m_pattern.m_containsBackreferences = true;
- m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
-
- if (subpatternId > m_pattern.m_numSubpatterns) {
- m_alternative->m_terms.append(PatternTerm::ForwardReference());
- return;
- }
-
- PatternAlternative* currentAlternative = m_alternative;
- ASSERT(currentAlternative);
-
- // Note to self: if we waited until the AST was baked, we could also remove forwards refs
- while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
- PatternTerm& term = currentAlternative->lastTerm();
- ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
-
- if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) {
- m_alternative->m_terms.append(PatternTerm::ForwardReference());
- return;
- }
- }
-
- m_alternative->m_terms.append(PatternTerm(subpatternId));
- }
-
- // deep copy the argument disjunction. If filterStartsWithBOL is true,
- // skip alternatives with m_startsWithBOL set true.
- PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
- {
- OwnPtr<PatternDisjunction> newDisjunction;
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
- if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
- if (!newDisjunction) {
- newDisjunction = adoptPtr(new PatternDisjunction());
- newDisjunction->m_parent = disjunction->m_parent;
- }
- PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
- newAlternative->m_terms.reserveInitialCapacity(alternative->m_terms.size());
- for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
- newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
- }
- }
-
- if (!newDisjunction)
- return 0;
-
- PatternDisjunction* copiedDisjunction = newDisjunction.get();
- m_pattern.m_disjunctions.append(newDisjunction.release());
- return copiedDisjunction;
- }
-
- PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
- {
- if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
- return PatternTerm(term);
-
- PatternTerm termCopy = term;
- termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
- return termCopy;
- }
-
- void quantifyAtom(unsigned min, unsigned max, bool greedy)
- {
- ASSERT(min <= max);
- ASSERT(m_alternative->m_terms.size());
-
- if (!max) {
- m_alternative->removeLastTerm();
- return;
- }
-
- PatternTerm& term = m_alternative->lastTerm();
- ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
- ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
-
- if (term.type == PatternTerm::TypeParentheticalAssertion) {
- // If an assertion is quantified with a minimum count of zero, it can simply be removed.
- // This arises from the RepeatMatcher behaviour in the spec. Matching an assertion never
- // results in any input being consumed, however the continuation passed to the assertion
- // (called in steps, 8c and 9 of the RepeatMatcher definition, ES5.1 15.10.2.5) will
- // reject all zero length matches (see step 2.1). A match from the continuation of the
- // expression will still be accepted regardless (via steps 8a and 11) - the upshot of all
- // this is that matches from the assertion are not required, and won't be accepted anyway,
- // so no need to ever run it.
- if (!min)
- m_alternative->removeLastTerm();
- // We never need to run an assertion more than once. Subsequent interations will be run
- // with the same start index (since assertions are non-capturing) and the same captures
- // (per step 4 of RepeatMatcher in ES5.1 15.10.2.5), and as such will always produce the
- // same result and captures. If the first match succeeds then the subsequent (min - 1)
- // matches will too. Any additional optional matches will fail (on the same basis as the
- // minimum zero quantified assertions, above), but this will still result in a match.
- return;
- }
-
- if (min == 0)
- term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
- else if (min == max)
- term.quantify(min, QuantifierFixedCount);
- else {
- term.quantify(min, QuantifierFixedCount);
- m_alternative->m_terms.append(copyTerm(term));
- // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
- m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
- if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
- m_alternative->lastTerm().parentheses.isCopy = true;
- }
- }
-
- void disjunction()
- {
- m_alternative = m_alternative->m_parent->addNewAlternative();
- }
-
- unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition)
- {
- alternative->m_hasFixedSize = true;
- Checked<unsigned> currentInputPosition = initialInputPosition;
-
- for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
- PatternTerm& term = alternative->m_terms[i];
-
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- case PatternTerm::TypeAssertionEOL:
- case PatternTerm::TypeAssertionWordBoundary:
- term.inputPosition = currentInputPosition.unsafeGet();
- break;
-
- case PatternTerm::TypeBackReference:
- term.inputPosition = currentInputPosition.unsafeGet();
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoBackReference;
- alternative->m_hasFixedSize = false;
- break;
-
- case PatternTerm::TypeForwardReference:
- break;
-
- case PatternTerm::TypePatternCharacter:
- term.inputPosition = currentInputPosition.unsafeGet();
- if (term.quantityType != QuantifierFixedCount) {
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter;
- alternative->m_hasFixedSize = false;
- } else
- currentInputPosition += term.quantityCount;
- break;
-
- case PatternTerm::TypeCharacterClass:
- term.inputPosition = currentInputPosition.unsafeGet();
- if (term.quantityType != QuantifierFixedCount) {
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass;
- alternative->m_hasFixedSize = false;
- } else
- currentInputPosition += term.quantityCount;
- break;
-
- case PatternTerm::TypeParenthesesSubpattern:
- // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
- term.frameLocation = currentCallFrameSize;
- if (term.quantityCount == 1 && !term.parentheses.isCopy) {
- if (term.quantityType != QuantifierFixedCount)
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
- currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet());
- // If quantity is fixed, then pre-check its minimum size.
- if (term.quantityType == QuantifierFixedCount)
- currentInputPosition += term.parentheses.disjunction->m_minimumSize;
- term.inputPosition = currentInputPosition.unsafeGet();
- } else if (term.parentheses.isTerminal) {
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
- currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet());
- term.inputPosition = currentInputPosition.unsafeGet();
- } else {
- term.inputPosition = currentInputPosition.unsafeGet();
- setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition.unsafeGet());
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
- }
- // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
- alternative->m_hasFixedSize = false;
- break;
-
- case PatternTerm::TypeParentheticalAssertion:
- term.inputPosition = currentInputPosition.unsafeGet();
- term.frameLocation = currentCallFrameSize;
- currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition.unsafeGet());
- break;
-
- case PatternTerm::TypeDotStarEnclosure:
- alternative->m_hasFixedSize = false;
- term.inputPosition = initialInputPosition;
- break;
- }
- }
-
- alternative->m_minimumSize = (currentInputPosition - initialInputPosition).unsafeGet();
- return currentCallFrameSize;
- }
-
- unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition)
- {
- if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
- initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative;
-
- unsigned minimumInputSize = UINT_MAX;
- unsigned maximumCallFrameSize = 0;
- bool hasFixedSize = true;
-
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
- unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
- minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize);
- maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize);
- hasFixedSize &= alternative->m_hasFixedSize;
- }
-
- ASSERT(minimumInputSize != UINT_MAX);
- ASSERT(maximumCallFrameSize >= initialCallFrameSize);
-
- disjunction->m_hasFixedSize = hasFixedSize;
- disjunction->m_minimumSize = minimumInputSize;
- disjunction->m_callFrameSize = maximumCallFrameSize;
- return maximumCallFrameSize;
- }
-
- void setupOffsets()
- {
- setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
- }
-
- // This optimization identifies sets of parentheses that we will never need to backtrack.
- // In these cases we do not need to store state from prior iterations.
- // We can presently avoid backtracking for:
- // * where the parens are at the end of the regular expression (last term in any of the
- // alternatives of the main body disjunction).
- // * where the parens are non-capturing, and quantified unbounded greedy (*).
- // * where the parens do not contain any capturing subpatterns.
- void checkForTerminalParentheses()
- {
- // This check is much too crude; should be just checking whether the candidate
- // node contains nested capturing subpatterns, not the whole expression!
- if (m_pattern.m_numSubpatterns)
- return;
-
- Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives;
- for (size_t i = 0; i < alternatives.size(); ++i) {
- Vector<PatternTerm>& terms = alternatives[i]->m_terms;
- if (terms.size()) {
- PatternTerm& term = terms.last();
- if (term.type == PatternTerm::TypeParenthesesSubpattern
- && term.quantityType == QuantifierGreedy
- && term.quantityCount == quantifyInfinite
- && !term.capture())
- term.parentheses.isTerminal = true;
- }
- }
- }
-
- void optimizeBOL()
- {
- // Look for expressions containing beginning of line (^) anchoring and unroll them.
- // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops
- // This code relies on the parsing code tagging alternatives with m_containsBOL and
- // m_startsWithBOL and rolling those up to containing alternatives.
- // At this point, this is only valid for non-multiline expressions.
- PatternDisjunction* disjunction = m_pattern.m_body;
-
- if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
- return;
-
- PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
-
- // Set alternatives in disjunction to "onceThrough"
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt)
- disjunction->m_alternatives[alt]->setOnceThrough();
-
- if (loopDisjunction) {
- // Move alternatives from loopDisjunction to disjunction
- for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
- disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt].release());
-
- loopDisjunction->m_alternatives.clear();
- }
- }
-
- bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex)
- {
- Vector<PatternTerm>& terms = alternative->m_terms;
-
- for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) {
- PatternTerm& term = terms[termIndex];
-
- if (term.m_capture)
- return true;
-
- if (term.type == PatternTerm::TypeParenthesesSubpattern) {
- PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
- for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
- if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
- return true;
- }
- }
- }
-
- return false;
- }
-
- // This optimization identifies alternatives in the form of
- // [^].*[?]<expression>.*[$] for expressions that don't have any
- // capturing terms. The alternative is changed to <expression>
- // followed by processing of the dot stars to find and adjust the
- // beginning and the end of the match.
- void optimizeDotStarWrappedExpressions()
- {
- Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives;
- if (alternatives.size() != 1)
- return;
-
- PatternAlternative* alternative = alternatives[0].get();
- Vector<PatternTerm>& terms = alternative->m_terms;
- if (terms.size() >= 3) {
- bool startsWithBOL = false;
- bool endsWithEOL = false;
- size_t termIndex, firstExpressionTerm, lastExpressionTerm;
-
- termIndex = 0;
- if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) {
- startsWithBOL = true;
- ++termIndex;
- }
-
- PatternTerm& firstNonAnchorTerm = terms[termIndex];
- if ((firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (firstNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || !((firstNonAnchorTerm.quantityType == QuantifierGreedy) || (firstNonAnchorTerm.quantityType == QuantifierNonGreedy)))
- return;
-
- firstExpressionTerm = termIndex + 1;
-
- termIndex = terms.size() - 1;
- if (terms[termIndex].type == PatternTerm::TypeAssertionEOL) {
- endsWithEOL = true;
- --termIndex;
- }
-
- PatternTerm& lastNonAnchorTerm = terms[termIndex];
- if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy))
- return;
-
- lastExpressionTerm = termIndex - 1;
-
- if (firstExpressionTerm > lastExpressionTerm)
- return;
-
- if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) {
- for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex)
- terms.remove(termIndex);
-
- for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex)
- terms.remove(termIndex - 1);
-
- terms.append(PatternTerm(startsWithBOL, endsWithEOL));
-
- m_pattern.m_containsBOL = false;
- }
- }
- }
-
-private:
- YarrPattern& m_pattern;
- PatternAlternative* m_alternative;
- CharacterClassConstructor m_characterClassConstructor;
- bool m_invertCharacterClass;
- bool m_invertParentheticalAssertion;
-};
-
-const char* YarrPattern::compile(const String& patternString)
-{
- YarrPatternConstructor constructor(*this);
-
- if (const char* error = parse(constructor, patternString))
- return error;
-
- // If the pattern contains illegal backreferences reset & reparse.
- // Quoting Netscape's "What's new in JavaScript 1.2",
- // "Note: if the number of left parentheses is less than the number specified
- // in \#, the \# is taken as an octal escape as described in the next row."
- if (containsIllegalBackReference()) {
- unsigned numSubpatterns = m_numSubpatterns;
-
- constructor.reset();
-#if !ASSERT_DISABLED
- const char* error =
-#endif
- parse(constructor, patternString, numSubpatterns);
-
- ASSERT(!error);
- ASSERT(numSubpatterns == m_numSubpatterns);
- }
-
- constructor.checkForTerminalParentheses();
- constructor.optimizeDotStarWrappedExpressions();
- constructor.optimizeBOL();
-
- constructor.setupOffsets();
-
- return 0;
-}
-
-YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error)
- : m_ignoreCase(ignoreCase)
- , m_multiline(multiline)
- , m_containsBackreferences(false)
- , m_containsBOL(false)
- , m_numSubpatterns(0)
- , m_maxBackReference(0)
- , newlineCached(0)
- , digitsCached(0)
- , spacesCached(0)
- , wordcharCached(0)
- , nondigitsCached(0)
- , nonspacesCached(0)
- , nonwordcharCached(0)
-{
- *error = compile(pattern);
-}
-
-} }
diff --git a/third_party/WebKit/Source/yarr/YarrPattern.h b/third_party/WebKit/Source/yarr/YarrPattern.h
deleted file mode 100644
index 4682c0b..0000000
--- a/third_party/WebKit/Source/yarr/YarrPattern.h
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef YarrPattern_h
-#define YarrPattern_h
-
-#include <wtf/CheckedArithmetic.h>
-#include <wtf/OwnPtr.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/RefCounted.h>
-#include <wtf/Vector.h>
-#include <wtf/text/WTFString.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace JSC { namespace Yarr {
-
-struct PatternDisjunction;
-
-struct CharacterRange {
- UChar begin;
- UChar end;
-
- CharacterRange(UChar begin, UChar end)
- : begin(begin)
- , end(end)
- {
- }
-};
-
-struct CharacterClassTable : RefCounted<CharacterClassTable> {
- const char* m_table;
- bool m_inverted;
- static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
- {
- return adoptRef(new CharacterClassTable(table, inverted));
- }
-
-private:
- CharacterClassTable(const char* table, bool inverted)
- : m_table(table)
- , m_inverted(inverted)
- {
- }
-};
-
-struct CharacterClass {
- WTF_MAKE_FAST_ALLOCATED;
-public:
- // All CharacterClass instances have to have the full set of matches and ranges,
- // they may have an optional table for faster lookups (which must match the
- // specified matches and ranges)
- CharacterClass(PassRefPtr<CharacterClassTable> table)
- : m_table(table)
- {
- }
- Vector<UChar> m_matches;
- Vector<CharacterRange> m_ranges;
- Vector<UChar> m_matchesUnicode;
- Vector<CharacterRange> m_rangesUnicode;
- RefPtr<CharacterClassTable> m_table;
-};
-
-enum QuantifierType {
- QuantifierFixedCount,
- QuantifierGreedy,
- QuantifierNonGreedy,
-};
-
-struct PatternTerm {
- enum Type {
- TypeAssertionBOL,
- TypeAssertionEOL,
- TypeAssertionWordBoundary,
- TypePatternCharacter,
- TypeCharacterClass,
- TypeBackReference,
- TypeForwardReference,
- TypeParenthesesSubpattern,
- TypeParentheticalAssertion,
- TypeDotStarEnclosure,
- } type;
- bool m_capture :1;
- bool m_invert :1;
- union {
- UChar patternCharacter;
- CharacterClass* characterClass;
- unsigned backReferenceSubpatternId;
- struct {
- PatternDisjunction* disjunction;
- unsigned subpatternId;
- unsigned lastSubpatternId;
- bool isCopy;
- bool isTerminal;
- } parentheses;
- struct {
- bool bolAnchor : 1;
- bool eolAnchor : 1;
- } anchors;
- };
- QuantifierType quantityType;
- Checked<unsigned> quantityCount;
- int inputPosition;
- unsigned frameLocation;
-
- PatternTerm(UChar ch)
- : type(PatternTerm::TypePatternCharacter)
- , m_capture(false)
- , m_invert(false)
- {
- patternCharacter = ch;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(CharacterClass* charClass, bool invert)
- : type(PatternTerm::TypeCharacterClass)
- , m_capture(false)
- , m_invert(invert)
- {
- characterClass = charClass;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
- : type(type)
- , m_capture(capture)
- , m_invert(invert)
- {
- parentheses.disjunction = disjunction;
- parentheses.subpatternId = subpatternId;
- parentheses.isCopy = false;
- parentheses.isTerminal = false;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(Type type, bool invert = false)
- : type(type)
- , m_capture(false)
- , m_invert(invert)
- {
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(unsigned spatternId)
- : type(TypeBackReference)
- , m_capture(false)
- , m_invert(false)
- {
- backReferenceSubpatternId = spatternId;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- PatternTerm(bool bolAnchor, bool eolAnchor)
- : type(TypeDotStarEnclosure)
- , m_capture(false)
- , m_invert(false)
- {
- anchors.bolAnchor = bolAnchor;
- anchors.eolAnchor = eolAnchor;
- quantityType = QuantifierFixedCount;
- quantityCount = 1;
- }
-
- static PatternTerm ForwardReference()
- {
- return PatternTerm(TypeForwardReference);
- }
-
- static PatternTerm BOL()
- {
- return PatternTerm(TypeAssertionBOL);
- }
-
- static PatternTerm EOL()
- {
- return PatternTerm(TypeAssertionEOL);
- }
-
- static PatternTerm WordBoundary(bool invert)
- {
- return PatternTerm(TypeAssertionWordBoundary, invert);
- }
-
- bool invert()
- {
- return m_invert;
- }
-
- bool capture()
- {
- return m_capture;
- }
-
- void quantify(unsigned count, QuantifierType type)
- {
- quantityCount = count;
- quantityType = type;
- }
-};
-
-struct PatternAlternative {
- WTF_MAKE_FAST_ALLOCATED;
-public:
- PatternAlternative(PatternDisjunction* disjunction)
- : m_parent(disjunction)
- , m_onceThrough(false)
- , m_hasFixedSize(false)
- , m_startsWithBOL(false)
- , m_containsBOL(false)
- {
- }
-
- PatternTerm& lastTerm()
- {
- ASSERT(m_terms.size());
- return m_terms[m_terms.size() - 1];
- }
-
- void removeLastTerm()
- {
- ASSERT(m_terms.size());
- m_terms.shrink(m_terms.size() - 1);
- }
-
- void setOnceThrough()
- {
- m_onceThrough = true;
- }
-
- bool onceThrough()
- {
- return m_onceThrough;
- }
-
- Vector<PatternTerm> m_terms;
- PatternDisjunction* m_parent;
- unsigned m_minimumSize;
- bool m_onceThrough : 1;
- bool m_hasFixedSize : 1;
- bool m_startsWithBOL : 1;
- bool m_containsBOL : 1;
-};
-
-struct PatternDisjunction {
- WTF_MAKE_FAST_ALLOCATED;
-public:
- PatternDisjunction(PatternAlternative* parent = 0)
- : m_parent(parent)
- , m_hasFixedSize(false)
- {
- }
-
- PatternAlternative* addNewAlternative()
- {
- PatternAlternative* alternative = new PatternAlternative(this);
- m_alternatives.append(adoptPtr(alternative));
- return alternative;
- }
-
- Vector<OwnPtr<PatternAlternative> > m_alternatives;
- PatternAlternative* m_parent;
- unsigned m_minimumSize;
- unsigned m_callFrameSize;
- bool m_hasFixedSize;
-};
-
-// You probably don't want to be calling these functions directly
-// (please to be calling newlineCharacterClass() et al on your
-// friendly neighborhood YarrPattern instance to get nicely
-// cached copies).
-CharacterClass* newlineCreate();
-CharacterClass* digitsCreate();
-CharacterClass* spacesCreate();
-CharacterClass* wordcharCreate();
-CharacterClass* nondigitsCreate();
-CharacterClass* nonspacesCreate();
-CharacterClass* nonwordcharCreate();
-
-struct TermChain {
- TermChain(PatternTerm term)
- : term(term)
- {}
-
- PatternTerm term;
- Vector<TermChain> hotTerms;
-};
-
-struct YarrPattern {
- YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error);
-
- void reset()
- {
- m_numSubpatterns = 0;
- m_maxBackReference = 0;
-
- m_containsBackreferences = false;
- m_containsBOL = false;
-
- newlineCached = 0;
- digitsCached = 0;
- spacesCached = 0;
- wordcharCached = 0;
- nondigitsCached = 0;
- nonspacesCached = 0;
- nonwordcharCached = 0;
-
- m_disjunctions.clear();
- m_userCharacterClasses.clear();
- }
-
- bool containsIllegalBackReference()
- {
- return m_maxBackReference > m_numSubpatterns;
- }
-
- CharacterClass* newlineCharacterClass()
- {
- if (!newlineCached)
- m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate()));
- return newlineCached;
- }
- CharacterClass* digitsCharacterClass()
- {
- if (!digitsCached)
- m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate()));
- return digitsCached;
- }
- CharacterClass* spacesCharacterClass()
- {
- if (!spacesCached)
- m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate()));
- return spacesCached;
- }
- CharacterClass* wordcharCharacterClass()
- {
- if (!wordcharCached)
- m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate()));
- return wordcharCached;
- }
- CharacterClass* nondigitsCharacterClass()
- {
- if (!nondigitsCached)
- m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate()));
- return nondigitsCached;
- }
- CharacterClass* nonspacesCharacterClass()
- {
- if (!nonspacesCached)
- m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate()));
- return nonspacesCached;
- }
- CharacterClass* nonwordcharCharacterClass()
- {
- if (!nonwordcharCached)
- m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate()));
- return nonwordcharCached;
- }
-
- bool m_ignoreCase : 1;
- bool m_multiline : 1;
- bool m_containsBackreferences : 1;
- bool m_containsBOL : 1;
- unsigned m_numSubpatterns;
- unsigned m_maxBackReference;
- PatternDisjunction* m_body;
- Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions;
- Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
-
-private:
- const char* compile(const String& patternString);
-
- CharacterClass* newlineCached;
- CharacterClass* digitsCached;
- CharacterClass* spacesCached;
- CharacterClass* wordcharCached;
- CharacterClass* nondigitsCached;
- CharacterClass* nonspacesCached;
- CharacterClass* nonwordcharCached;
-};
-
-} } // namespace JSC::Yarr
-
-#endif // YarrPattern_h
diff --git a/third_party/WebKit/Source/yarr/YarrSyntaxChecker.cpp b/third_party/WebKit/Source/yarr/YarrSyntaxChecker.cpp
deleted file mode 100644
index b1f922f..0000000
--- a/third_party/WebKit/Source/yarr/YarrSyntaxChecker.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2011 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "yarr/YarrSyntaxChecker.h"
-
-#include "yarr/YarrParser.h"
-
-namespace JSC { namespace Yarr {
-
-class SyntaxChecker {
-public:
- void assertionBOL() {}
- void assertionEOL() {}
- void assertionWordBoundary(bool) {}
- void atomPatternCharacter(UChar) {}
- void atomBuiltInCharacterClass(BuiltInCharacterClassID, bool) {}
- void atomCharacterClassBegin(bool = false) {}
- void atomCharacterClassAtom(UChar) {}
- void atomCharacterClassRange(UChar, UChar) {}
- void atomCharacterClassBuiltIn(BuiltInCharacterClassID, bool) {}
- void atomCharacterClassEnd() {}
- void atomParenthesesSubpatternBegin(bool = true) {}
- void atomParentheticalAssertionBegin(bool = false) {}
- void atomParenthesesEnd() {}
- void atomBackReference(unsigned) {}
- void quantifyAtom(unsigned, unsigned, bool) {}
- void disjunction() {}
-};
-
-const char* checkSyntax(const String& pattern)
-{
- SyntaxChecker syntaxChecker;
- return parse(syntaxChecker, pattern);
-}
-
-}} // JSC::YARR
diff --git a/third_party/WebKit/Source/yarr/YarrSyntaxChecker.h b/third_party/WebKit/Source/yarr/YarrSyntaxChecker.h
deleted file mode 100644
index 104ced3..0000000
--- a/third_party/WebKit/Source/yarr/YarrSyntaxChecker.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2011 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef YarrSyntaxChecker_h
-#define YarrSyntaxChecker_h
-
-#include <wtf/text/WTFString.h>
-
-namespace JSC { namespace Yarr {
-
-const char* checkSyntax(const String& pattern);
-
-}} // JSC::YARR
-
-#endif // YarrSyntaxChecker_h
-
diff --git a/third_party/WebKit/Source/yarr/create_regex_tables b/third_party/WebKit/Source/yarr/create_regex_tables
deleted file mode 100644
index bd799ba..0000000
--- a/third_party/WebKit/Source/yarr/create_regex_tables
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (C) 2010 Apple Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import sys
-
-types = {
- "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
- "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
- "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
- "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
- "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]},
- "digits": { "UseTable" : False, "data": [('0', '9')]},
- "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
-}
-entriesPerLine = 50
-arrays = "";
-functions = "";
-emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables")
-
-for name, classes in types.items():
- ranges = [];
- size = 0;
- for _class in classes["data"]:
- if type(_class) == str:
- ranges.append((ord(_class), ord(_class)))
- elif type(_class) == int:
- ranges.append((_class, _class))
- else:
- (min, max) = _class;
- if type(min) == str:
- min = ord(min)
- if type(max) == str:
- max = ord(max)
- if max > 0x7f and min <= 0x7f:
- ranges.append((min, 0x7f))
- min = 0x80
- ranges.append((min,max))
- ranges.sort();
-
- if emitTables and classes["UseTable"] and (not "Inverse" in classes):
- array = ("static const char _%sData[65536] = {\n" % name);
- i = 0
- for (min,max) in ranges:
- while i < min:
- i = i + 1
- array += ('0,')
- if (i % entriesPerLine == 0) and (i != 0):
- array += ('\n')
- while i <= max:
- i = i + 1
- if (i == 65536):
- array += ("1")
- else:
- array += ('1,')
- if (i % entriesPerLine == 0) and (i != 0):
- array += ('\n')
- while i < 0xffff:
- array += ("0,")
- i = i + 1;
- if (i % entriesPerLine == 0) and (i != 0):
- array += ('\n')
- if i == 0xffff:
- array += ("0")
- array += ("\n};\n\n");
- arrays += array
-
- # Generate createFunction:
- function = "";
- function += ("CharacterClass* %sCreate()\n" % name)
- function += ("{\n")
- if emitTables and classes["UseTable"]:
- if "Inverse" in classes:
- function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"]))
- else:
- function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name))
- else:
- function += (" CharacterClass* characterClass = new CharacterClass(0);\n")
- for (min, max) in ranges:
- if (min == max):
- if (min > 127):
- function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
- else:
- function += (" characterClass->m_matches.append(0x%02x);\n" % min)
- continue
- if (min > 127) or (max > 127):
- function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
- else:
- function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
- function += (" return characterClass;\n")
- function += ("}\n\n")
- functions += function
-
-if (len(sys.argv) > 1):
- f = open(sys.argv[-1], "w")
- f.write(arrays)
- f.write(functions)
- f.close()
-else:
- print(arrays)
- print(functions)
-
diff --git a/third_party/WebKit/Source/yarr/yarr.gyp b/third_party/WebKit/Source/yarr/yarr.gyp
deleted file mode 100644
index 8f82ed2..0000000
--- a/third_party/WebKit/Source/yarr/yarr.gyp
+++ /dev/null
@@ -1,100 +0,0 @@
-#
-# Copyright (C) 2013 Google Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-
-{
- 'includes': [
- '../WebKit/chromium/WinPrecompile.gypi',
- '../core/features.gypi',
- ],
- 'conditions': [
- ['os_posix == 1 and OS != "mac" and gcc_version>=46', {
- 'target_defaults': {
- # Disable warnings about c++0x compatibility, as some names (such as nullptr) conflict
- # with upcoming c++0x types.
- 'cflags_cc': ['-Wno-c++0x-compat'],
- },
- }],
- ],
- 'targets': [
- {
- 'target_name': 'yarr',
- 'type': 'static_library',
- 'dependencies': [
- '../wtf/wtf.gyp:wtf',
- ],
- 'variables': { 'optimize': 'max' },
- 'actions': [
- {
- 'action_name': 'generate_regex_tables',
- 'inputs': [
- 'create_regex_tables',
- ],
- 'arguments': [
- '--no-tables',
- ],
- 'outputs': [
- '<(INTERMEDIATE_DIR)/RegExpJitTables.h',
- ],
- 'action': ['python', '<@(_inputs)', '<@(_arguments)', '<@(_outputs)'],
- },
- ],
- 'include_dirs': [
- '<(INTERMEDIATE_DIR)',
- '..',
- ],
- 'sources': [
- 'Yarr.h',
- 'YarrCanonicalizeUCS2.cpp',
- 'YarrCanonicalizeUCS2.h',
- 'YarrInterpreter.cpp',
- 'YarrInterpreter.h',
- 'YarrParser.h',
- 'YarrPattern.cpp',
- 'YarrPattern.h',
- 'YarrSyntaxChecker.cpp',
- 'YarrSyntaxChecker.h',
- ],
- 'direct_dependent_settings': {
- 'include_dirs': [
- '..',
- ],
- },
- 'export_dependent_settings': [
- '../wtf/wtf.gyp:wtf',
- ],
- 'conditions': [
- ['OS=="win"', {
- # Disable c4267 warnings until we fix size_t to int truncations.
- 'msvs_disabled_warnings': [4267, ],
- }],
- ],
- },
- ],
-}