From 5f65b1798a72eee082995a6a2feb62eaf4ff7c3c Mon Sep 17 00:00:00 2001 From: gorhill Date: Wed, 4 Mar 2015 19:36:09 -0500 Subject: this fixes #702 --- src/js/background.js | 6 +- src/js/static-net-filtering.js | 501 +++++++---------------------------------- 2 files changed, 81 insertions(+), 426 deletions(-) (limited to 'src/js') diff --git a/src/js/background.js b/src/js/background.js index 5beaff7..da7dad1 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -81,12 +81,14 @@ return { localSettings: { blockedRequestCount: 0, - allowedRequestCount: 0 + allowedRequestCount: 0, }, + localSettingsModifyTime: 0, + localSettingsSaveTime: 0, // read-only systemSettings: { - compiledMagic: 'shztbfhkfjit', + compiledMagic: 'wcuwrlodqyee', selfieMagic: 'spqmeuaftfra' }, diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index d77ef6e..ea2e260 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -165,10 +165,18 @@ var isFirstParty = function(firstPartyDomain, hostname) { return c === '.' || c === ''; }; -var strToRegex = function(prefix, s) { - var reStr = s.replace(/([.+?^=!:${}()|\[\]\/\\])/g, '\\$1') +// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions + +var strToRegex = function(s, anchor) { + var reStr = s.replace(/[.+?^${}()|[\]\\]/g, '\\$&') .replace(/\*/g, '.*'); - return new RegExp(prefix + reStr); + if ( anchor < 0 ) { + reStr = '^' + reStr; + } else if ( anchor > 0 ) { + reStr += reStr + '$'; + } + //console.debug('µBlock.staticNetFilteringEngine: created RegExp("%s")', reStr); + return new RegExp(reStr); }; /******************************************************************************* @@ -189,33 +197,13 @@ Filters family tree: - no hostname - specific hostname (not implemented) -- one wildcard - - anywhere - - no hostname - - specific hostname - - anchored at start - - no hostname - - specific hostname - - anchored at end - - no hostname - - specific hostname +- with wildcard(s) - anchored within hostname - - no hostname (not implemented) - - specific hostname (not implemented) - -- more than one wildcard - - anywhere - no hostname - specific hostname - - anchored at start + - all else - no hostname - specific hostname - - anchored at end - - no hostname - - specific hostname - - anchored within hostname - - no hostname (not implemented) - - specific hostname (not implemented) */ @@ -545,7 +533,7 @@ FilterPlainHnAnchored.prototype.match = function(url, tokenBeg) { reURLPostHostnameAnchors.test(url.slice(pos + 3, tokenBeg)) === false; }; -FilterPlainHnAnchored.fid = FilterPlainHnAnchored.prototype.fid = 'h|a'; +FilterPlainHnAnchored.fid = FilterPlainHnAnchored.prototype.fid = '||a'; FilterPlainHnAnchored.prototype.toString = function() { return '||' + this.s; @@ -567,310 +555,81 @@ FilterPlainHnAnchored.fromSelfie = function(s) { /******************************************************************************/ -// With a single wildcard, regex is not optimal. -// See: -// http://jsperf.com/regexp-vs-indexof-abp-miss/5 -// http://jsperf.com/regexp-vs-indexof-abp-hit/4 - -var FilterSingleWildcard = function(lSegment, rSegment, tokenBeg) { - this.tokenBeg = tokenBeg; - this.lSegment = lSegment; - this.rSegment = rSegment; -}; - -FilterSingleWildcard.prototype.match = function(url, tokenBeg) { - tokenBeg -= this.tokenBeg; - return url.substr(tokenBeg, this.lSegment.length) === this.lSegment && - url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0; -}; - -FilterSingleWildcard.fid = FilterSingleWildcard.prototype.fid = '*'; - -FilterSingleWildcard.prototype.toString = function() { - return this.lSegment + '*' + this.rSegment; -}; - -FilterSingleWildcard.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment + '\t' + - this.tokenBeg; -}; - -FilterSingleWildcard.compile = function(details) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + - s.slice(pos + 1) + '\t' + - details.tokenBeg; -}; - -FilterSingleWildcard.fromSelfie = function(s) { - var args = s.split('\t'); - return new FilterSingleWildcard(args[0], args[1], atoi(args[2])); -}; - -/******************************************************************************/ - -var FilterSingleWildcardHostname = function(lSegment, rSegment, tokenBeg, hostname) { - this.tokenBeg = tokenBeg; - this.lSegment = lSegment; - this.rSegment = rSegment; - this.hostname = hostname; -}; - -FilterSingleWildcardHostname.prototype.match = function(url, tokenBeg) { - tokenBeg -= this.tokenBeg; - return pageHostnameRegister.slice(-this.hostname.length) === this.hostname && - url.substr(tokenBeg, this.lSegment.length) === this.lSegment && - url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0; -}; - -FilterSingleWildcardHostname.fid = FilterSingleWildcardHostname.prototype.fid = '*h'; - -FilterSingleWildcardHostname.prototype.toString = function() { - return this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname; -}; - -FilterSingleWildcardHostname.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment + '\t' + - this.tokenBeg + '\t' + - this.hostname; -}; - -FilterSingleWildcardHostname.compile = function(details, hostname) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + - s.slice(pos + 1) + '\t' + - details.tokenBeg + '\t' + - hostname; -}; - -FilterSingleWildcardHostname.fromSelfie = function(s) { - var args = s.split('\t'); - return new FilterSingleWildcardHostname(args[0], args[1], atoi(args[2]), args[3]); -}; - -/******************************************************************************/ - -var FilterSingleWildcardPrefix0 = function(lSegment, rSegment) { - this.lSegment = lSegment; - this.rSegment = rSegment; -}; - -FilterSingleWildcardPrefix0.prototype.match = function(url, tokenBeg) { - return url.substr(tokenBeg, this.lSegment.length) === this.lSegment && - url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0; -}; - -FilterSingleWildcardPrefix0.fid = FilterSingleWildcardPrefix0.prototype.fid = '0*'; - -FilterSingleWildcardPrefix0.prototype.toString = function() { - return this.lSegment + '*' + this.rSegment; -}; - -FilterSingleWildcardPrefix0.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment; -}; - -FilterSingleWildcardPrefix0.compile = function(details) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + s.slice(pos + 1); -}; - -FilterSingleWildcardPrefix0.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterSingleWildcardPrefix0(s.slice(0, pos), s.slice(pos + 1)); -}; - -/******************************************************************************/ - -var FilterSingleWildcardPrefix0Hostname = function(lSegment, rSegment, hostname) { - this.lSegment = lSegment; - this.rSegment = rSegment; - this.hostname = hostname; -}; - -FilterSingleWildcardPrefix0Hostname.prototype.match = function(url, tokenBeg) { - return pageHostnameRegister.slice(-this.hostname.length) === this.hostname && - url.substr(tokenBeg, this.lSegment.length) === this.lSegment && - url.indexOf(this.rSegment, tokenBeg + this.lSegment.length) > 0; -}; - -FilterSingleWildcardPrefix0Hostname.fid = FilterSingleWildcardPrefix0Hostname.prototype.fid = '0*h'; - -FilterSingleWildcardPrefix0Hostname.prototype.toString = function() { - return this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname; -}; - -FilterSingleWildcardPrefix0Hostname.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment + '\t' + - this.hostname; -}; - -FilterSingleWildcardPrefix0Hostname.compile = function(details, hostname) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + - s.slice(pos + 1) + '\t' + - hostname; -}; - -FilterSingleWildcardPrefix0Hostname.fromSelfie = function(s) { - var args = s.split('\t'); - return new FilterSingleWildcardPrefix0Hostname(args[0], args[1], args[2]); -}; - -/******************************************************************************/ +// Generic filter -var FilterSingleWildcardLeftAnchored = function(lSegment, rSegment) { - this.lSegment = lSegment; - this.rSegment = rSegment; +var FilterGeneric = function(s, anchor) { + this.s = s; + this.anchor = anchor; + this.re = null; }; -FilterSingleWildcardLeftAnchored.prototype.match = function(url) { - return url.slice(0, this.lSegment.length) === this.lSegment && - url.indexOf(this.rSegment, this.lSegment.length) > 0; +FilterGeneric.prototype.match = function(url) { + if ( this.re === null ) { + this.re = strToRegex(this.s, this.anchor); + } + return this.re.test(url); }; -FilterSingleWildcardLeftAnchored.fid = FilterSingleWildcardLeftAnchored.prototype.fid = '|*'; +FilterGeneric.fid = FilterGeneric.prototype.fid = '_'; -FilterSingleWildcardLeftAnchored.prototype.toString = function() { - return '|' + this.lSegment + '*' + this.rSegment; +FilterGeneric.prototype.toString = function() { + if ( this.anchor === 0 ) { + return this.s; + } + if ( this.anchor < 0 ) { + return '|' + this.s; + } + return this.s + '|'; }; -FilterSingleWildcardLeftAnchored.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment; +FilterGeneric.prototype.toSelfie = function() { + return this.s + '\t' + this.anchor; }; -FilterSingleWildcardLeftAnchored.compile = function(details) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + - s.slice(pos + 1); +FilterGeneric.compile = function(details) { + return details.f + '\t' + details.anchor; }; -FilterSingleWildcardLeftAnchored.fromSelfie = function(s) { +FilterGeneric.fromSelfie = function(s) { var pos = s.indexOf('\t'); - return new FilterSingleWildcardLeftAnchored(s.slice(0, pos), s.slice(pos + 1)); -}; - -/******************************************************************************/ - -var FilterSingleWildcardLeftAnchoredHostname = function(lSegment, rSegment, hostname) { - this.lSegment = lSegment; - this.rSegment = rSegment; - this.hostname = hostname; -}; - -FilterSingleWildcardLeftAnchoredHostname.prototype.match = function(url) { - return pageHostnameRegister.slice(-this.hostname.length) === this.hostname && - url.slice(0, this.lSegment.length) === this.lSegment && - url.indexOf(this.rSegment, this.lSegment.length) > 0; -}; - -FilterSingleWildcardLeftAnchoredHostname.fid = FilterSingleWildcardLeftAnchoredHostname.prototype.fid = '|*h'; - -FilterSingleWildcardLeftAnchoredHostname.prototype.toString = function() { - return '|' + this.lSegment + '*' + this.rSegment + '$domain=' + this.hostname; -}; - -FilterSingleWildcardLeftAnchoredHostname.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment + '\t' + - this.hostname; -}; - -FilterSingleWildcardLeftAnchoredHostname.compile = function(details, hostname) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + - s.slice(pos + 1) + '\t' + - hostname; -}; - -FilterSingleWildcardLeftAnchoredHostname.fromSelfie = function(s) { - var args = s.split('\t'); - return new FilterSingleWildcardLeftAnchoredHostname(args[0], args[1], args[2]); + return new FilterGeneric(s.slice(0, pos), parseInt(s.slice(pos + 1), 10)); }; /******************************************************************************/ -var FilterSingleWildcardRightAnchored = function(lSegment, rSegment) { - this.lSegment = lSegment; - this.rSegment = rSegment; -}; - -FilterSingleWildcardRightAnchored.prototype.match = function(url) { - return url.slice(-this.rSegment.length) === this.rSegment && - url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0; -}; - -FilterSingleWildcardRightAnchored.fid = FilterSingleWildcardRightAnchored.prototype.fid = '*|'; - -FilterSingleWildcardRightAnchored.prototype.toString = function() { - return this.lSegment + '*' + this.rSegment + '|'; -}; - -FilterSingleWildcardRightAnchored.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment; -}; - -FilterSingleWildcardRightAnchored.compile = function(details) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + - s.slice(pos + 1); -}; - -FilterSingleWildcardRightAnchored.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterSingleWildcardRightAnchored(s.slice(0, pos), s.slice(pos + 1)); -}; - -/******************************************************************************/ +// Generic filter -var FilterSingleWildcardRightAnchoredHostname = function(lSegment, rSegment, hostname) { - this.lSegment = lSegment; - this.rSegment = rSegment; +var FilterGenericHostname = function(s, anchor, hostname) { + FilterGeneric.call(this, s, anchor); this.hostname = hostname; }; +FilterGenericHostname.prototype = Object.create(FilterGeneric.prototype); +FilterGenericHostname.prototype.constructor = FilterGenericHostname; -FilterSingleWildcardRightAnchoredHostname.prototype.match = function(url) { - return pageHostnameRegister.slice(-this.hostname.length) === this.hostname && - url.slice(-this.rSegment.length) === this.rSegment && - url.lastIndexOf(this.lSegment, url.length - this.rSegment.length - this.lSegment.length) >= 0; +FilterGenericHostname.prototype.match = function(url) { + if ( pageHostnameRegister.slice(-this.hostname.length) !== this.hostname ) { + return false; + } + return FilterGeneric.prototype.match.call(this, url); }; -FilterSingleWildcardRightAnchoredHostname.fid = FilterSingleWildcardRightAnchoredHostname.prototype.fid = '*|h'; +FilterGenericHostname.fid = FilterGenericHostname.prototype.fid = '_h'; -FilterSingleWildcardRightAnchoredHostname.prototype.toString = function() { - return this.lSegment + '*' + this.rSegment + '|$domain=' + this.hostname; +FilterGenericHostname.prototype.toString = function() { + return FilterGeneric.prototype.toString.call(this) + '$domain=' + this.hostname; }; -FilterSingleWildcardRightAnchoredHostname.prototype.toSelfie = function() { - return this.lSegment + '\t' + - this.rSegment + '\t' + - this.hostname; +FilterGenericHostname.prototype.toSelfie = function() { + return FilterGeneric.prototype.toSelfie.call(this) + '\t' + this.hostname; }; -FilterSingleWildcardRightAnchoredHostname.compile = function(details, hostname) { - var s = details.f; - var pos = s.indexOf('*'); - return s.slice(0, pos) + '\t' + - s.slice(pos + 1) + '\t' + - hostname; +FilterGenericHostname.compile = function(details, hostname) { + return FilterGeneric.compile(details) + '\t' + hostname; }; -FilterSingleWildcardRightAnchoredHostname.fromSelfie = function(s) { - var args = s.split('\t'); - return new FilterSingleWildcardRightAnchoredHostname(args[0], args[1], args[2]); +FilterGenericHostname.fromSelfie = function(s) { + var fields = s.split('\t'); + return new FilterGenericHostname(fields[0], parseInt(fields[1], 10), fields[2]); }; /******************************************************************************/ @@ -886,7 +645,7 @@ var FilterGenericHnAnchored = function(s) { FilterGenericHnAnchored.prototype.match = function(url) { if ( this.re === null ) { - this.re = strToRegex('', this.s); + this.re = strToRegex(this.s, 0); } // Quick test first if ( this.re.test(url) === false ) { @@ -925,6 +684,7 @@ var FilterGenericHnAnchoredHostname = function(s, hostname) { this.hostname = hostname; }; FilterGenericHnAnchoredHostname.prototype = Object.create(FilterGenericHnAnchored.prototype); +FilterGenericHnAnchoredHostname.prototype.constructor = FilterGenericHnAnchoredHostname; FilterGenericHnAnchoredHostname.prototype.match = function(url) { if ( pageHostnameRegister.slice(-this.hostname.length) !== this.hostname ) { @@ -954,88 +714,6 @@ FilterGenericHnAnchoredHostname.fromSelfie = function(s) { /******************************************************************************/ -// With many wildcards, a regex is best. - -// Ref: regex escaper taken from: -// https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions -// modified for the purpose here. - -var FilterManyWildcards = function(s, tokenBeg) { - this.s = s; - this.tokenBeg = tokenBeg; - this.re = null; -}; - -FilterManyWildcards.prototype.match = function(url, tokenBeg) { - if ( this.re === null ) { - this.re = strToRegex('^', this.s); - } - return this.re.test(url.slice(tokenBeg - this.tokenBeg)); -}; - -FilterManyWildcards.fid = FilterManyWildcards.prototype.fid = '*+'; - -FilterManyWildcards.prototype.toString = function() { - return this.s; -}; - -FilterManyWildcards.prototype.toSelfie = function() { - return this.s + '\t' + this.tokenBeg; -}; - -FilterManyWildcards.compile = function(details) { - return details.f + '\t' + details.tokenBeg; -}; - -FilterManyWildcards.fromSelfie = function(s) { - var pos = s.indexOf('\t'); - return new FilterManyWildcards(s.slice(0, pos), atoi(s.slice(pos + 1))); -}; - -/******************************************************************************/ - -var FilterManyWildcardsHostname = function(s, tokenBeg, hostname) { - this.s = s; - this.tokenBeg = tokenBeg; - this.re = null; - this.hostname = hostname; -}; - -FilterManyWildcardsHostname.prototype.match = function(url, tokenBeg) { - if ( pageHostnameRegister.slice(-this.hostname.length) !== this.hostname ) { - return false; - } - if ( this.re === null ) { - this.re = strToRegex('^', this.s); - } - return this.re.test(url.slice(tokenBeg - this.tokenBeg)); -}; - -FilterManyWildcardsHostname.fid = FilterManyWildcardsHostname.prototype.fid = '*+h'; - -FilterManyWildcardsHostname.prototype.toString = function() { - return this.s + '$domain=' + this.hostname; -}; - -FilterManyWildcardsHostname.prototype.toSelfie = function() { - return this.s + '\t' + - this.tokenBeg + '\t' + - this.hostname; -}; - -FilterManyWildcardsHostname.compile = function(details, hostname) { - return details.f + '\t' + - details.tokenBeg + '\t' + - hostname; -}; - -FilterManyWildcardsHostname.fromSelfie = function(s) { - var args = s.split('\t'); - return new FilterManyWildcardsHostname(args[0], atoi(args[1]), args[2]); -}; - -/******************************************************************************/ - // Regex-based filters var FilterRegex = function(s) { @@ -1404,24 +1082,11 @@ var getFilterClass = function(details) { return FilterRegex; } var s = details.f; - var wcOffset = s.indexOf('*'); - if ( wcOffset !== -1 ) { + if ( s.indexOf('*') !== -1 ) { if ( details.hostnameAnchored ) { return FilterGenericHnAnchored; } - if ( s.indexOf('*', wcOffset + 1) !== -1 ) { - return details.anchor === 0 ? FilterManyWildcards : null; - } - if ( details.anchor < 0 ) { - return FilterSingleWildcardLeftAnchored; - } - if ( details.anchor > 0 ) { - return FilterSingleWildcardRightAnchored; - } - if ( details.tokenBeg === 0 ) { - return FilterSingleWildcardPrefix0; - } - return FilterSingleWildcard; + return FilterGeneric; } if ( details.anchor < 0 ) { return FilterPlainLeftAnchored; @@ -1448,24 +1113,11 @@ var getHostnameBasedFilterClass = function(details) { return FilterRegexHostname; } var s = details.f; - var wcOffset = s.indexOf('*'); - if ( wcOffset !== -1 ) { + if ( s.indexOf('*') !== -1 ) { if ( details.hostnameAnchored ) { return FilterGenericHnAnchoredHostname; } - if ( s.indexOf('*', wcOffset + 1) !== -1 ) { - return details.anchor === 0 ? FilterManyWildcardsHostname : null; - } - if ( details.anchor < 0 ) { - return FilterSingleWildcardLeftAnchoredHostname; - } - if ( details.anchor > 0 ) { - return FilterSingleWildcardRightAnchoredHostname; - } - if ( details.tokenBeg === 0 ) { - return FilterSingleWildcardPrefix0Hostname; - } - return FilterSingleWildcardHostname; + return FilterGenericHostname; } if ( details.anchor < 0 ) { return FilterPlainLeftAnchoredHostname; @@ -1859,6 +1511,7 @@ FilterContainer.prototype.reset = function() { this.duplicateBuster = {}; this.categories = Object.create(null); this.filterParser.reset(); + this.filterCounts = {}; }; /******************************************************************************/ @@ -1894,20 +1547,12 @@ FilterContainer.prototype.factories = { '|ah': FilterPlainLeftAnchoredHostname, 'a|': FilterPlainRightAnchored, 'a|h': FilterPlainRightAnchoredHostname, - 'h|a': FilterPlainHnAnchored, - '*': FilterSingleWildcard, - '*h': FilterSingleWildcardHostname, - '0*': FilterSingleWildcardPrefix0, - '0*h': FilterSingleWildcardPrefix0Hostname, - '|*': FilterSingleWildcardLeftAnchored, - '|*h': FilterSingleWildcardLeftAnchoredHostname, - '*|': FilterSingleWildcardRightAnchored, - '*|h': FilterSingleWildcardRightAnchoredHostname, - '*+': FilterManyWildcards, - '*+h': FilterManyWildcardsHostname, + '||a': FilterPlainHnAnchored, '//': FilterRegex, '//h': FilterRegexHostname, '{h}': FilterHostnameDict, + '_': FilterGeneric, + '_h': FilterGenericHostname, '||_': FilterGenericHnAnchored, '||_h': FilterGenericHnAnchoredHostname }; @@ -2239,6 +1884,14 @@ FilterContainer.prototype.fromCompiledContent = function(text, lineBeg) { this.duplicateBuster[line] = true; factory = this.factories[fields[2]]; + + // For development purpose + //if ( this.filterCounts.hasOwnProperty(fields[2]) === false ) { + // this.filterCounts[fields[2]] = 1; + //} else { + // this.filterCounts[fields[2]]++; + //} + filter = factory.fromSelfie(fields[3]); if ( entry === undefined ) { bucket[fields[1]] = filter; -- cgit v1.1