Skip to content

Commit 3fab7bf

Browse files
committed
Add support for "specific-generic" cosmetic filters
A specific cosmetic filter of the following form... *##.selector ... will be unconditionally injected into all web pages, whereas a cosmetic filter of the form... ##.selector ... would be injected only when uBO's DOM surveyor finds at least one matching element in a web page. The new specific-generic form will also be disabled when a web page is subject to a `generichide` exception filter, since the filter is essentially a generic one -- the only difference from the usual generic form is that the filter is injected unconditionally instead of through the DOM surveyor. Specific-generic cosmetic filters will NOT be discarded when checking the "Ignore generic cosmetic filters" option in the "Filter lists" pane -- since the purpose of this option is primarily to disable the DOM surveyor. Specific-generic cosmetic filters should be used parcimoniously and only when using a normal specific filter is really impractical. Related issue: - uBlockOrigin/uBlock-issues#803
1 parent 32b9db6 commit 3fab7bf

File tree

3 files changed

+63
-41
lines changed

3 files changed

+63
-41
lines changed

src/js/cosmetic-filtering.js

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ const FilterContainer = function() {
191191
this.rePlainSelectorEscaped = /^[#.](?:\\[0-9A-Fa-f]+ |\\.|\w|-)+/;
192192
this.rePlainSelectorEx = /^[^#.\[(]+([#.][\w-]+)|([#.][\w-]+)$/;
193193
this.reEscapeSequence = /\\([0-9A-Fa-f]+ |.)/g;
194-
this.reSimpleHighGeneric1 = /^[a-z]*\[[^[]+]$/;
194+
this.reSimpleHighGeneric = /^(?:[a-z]*\[[^\]]+\]|\S+)$/;
195195
this.reHighMedium = /^\[href\^="https?:\/\/([^"]{8})[^"]*"\]$/;
196196

197197
this.selectorCache = new Map();
@@ -274,8 +274,8 @@ FilterContainer.prototype.reset = function() {
274274
this.selectorCacheTimer = null;
275275
}
276276

277-
// generic filters
278-
this.hasGenericHide = false;
277+
// whether there is at least one surveyor-based filter
278+
this.needDOMSurveyor = false;
279279

280280
// hostname, entity-based filters
281281
this.specificFilters.clear();
@@ -301,13 +301,11 @@ FilterContainer.prototype.freeze = function() {
301301
this.duplicateBuster.clear();
302302
this.specificFilters.collectGarbage();
303303

304-
this.hasGenericHide =
304+
this.needDOMSurveyor =
305305
this.lowlyGeneric.id.simple.size !== 0 ||
306306
this.lowlyGeneric.id.complex.size !== 0 ||
307307
this.lowlyGeneric.cl.simple.size !== 0 ||
308-
this.lowlyGeneric.cl.complex.size !== 0 ||
309-
this.highlyGeneric.simple.dict.size !== 0 ||
310-
this.highlyGeneric.complex.dict.size !== 0;
308+
this.lowlyGeneric.cl.complex.size !== 0;
311309

312310
this.highlyGeneric.simple.str = Array.from(this.highlyGeneric.simple.dict).join(',\n');
313311
this.highlyGeneric.simple.mru.reset();
@@ -333,8 +331,8 @@ FilterContainer.prototype.keyFromSelector = function(selector) {
333331
matches = this.rePlainSelectorEscaped.exec(selector);
334332
if ( matches === null ) { return; }
335333
key = '';
336-
let escaped = matches[0],
337-
beg = 0;
334+
const escaped = matches[0];
335+
let beg = 0;
338336
this.reEscapeSequence.lastIndex = 0;
339337
for (;;) {
340338
matches = this.reEscapeSequence.exec(escaped);
@@ -402,22 +400,19 @@ FilterContainer.prototype.compileGenericHideSelector = function(
402400
const type = selector.charCodeAt(0);
403401
let key;
404402

403+
// Simple selector-based CSS rule: no need to test for whether the
404+
// selector is valid, the regex took care of this. Most generic selector
405+
// falls into that category:
406+
// - ###ad-bigbox
407+
// - ##.ads-bigbox
405408
if ( type === 0x23 /* '#' */ ) {
406409
key = this.keyFromSelector(selector);
407-
// Simple selector-based CSS rule: no need to test for whether the
408-
// selector is valid, the regex took care of this. Most generic
409-
// selector falls into that category.
410-
// - ###ad-bigbox
411410
if ( key === selector ) {
412411
writer.push([ 0, key.slice(1) ]);
413412
return;
414413
}
415414
} else if ( type === 0x2E /* '.' */ ) {
416415
key = this.keyFromSelector(selector);
417-
// Simple selector-based CSS rule: no need to test for whether the
418-
// selector is valid, the regex took care of this. Most generic
419-
// selector falls into that category.
420-
// - ##.ads-bigbox
421416
if ( key === selector ) {
422417
writer.push([ 2, key.slice(1) ]);
423418
return;
@@ -484,12 +479,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(
484479
// For efficiency purpose, we will distinguish between simple and complex
485480
// selectors.
486481

487-
if ( this.reSimpleHighGeneric1.test(selector) ) {
488-
writer.push([ 4 /* simple */, selector ]);
489-
return;
490-
}
491-
492-
if ( selector.indexOf(' ') === -1 ) {
482+
if ( this.reSimpleHighGeneric.test(selector) ) {
493483
writer.push([ 4 /* simple */, selector ]);
494484
} else {
495485
writer.push([ 5 /* complex */, selector ]);
@@ -551,10 +541,13 @@ FilterContainer.prototype.compileSpecificSelector = function(
551541

552542
let kind = 0;
553543
if ( unhide === 1 ) {
554-
kind |= 0b01; // Exception
544+
kind |= 0b001; // Exception
555545
}
556546
if ( compiled.charCodeAt(0) === 0x7B /* '{' */ ) {
557-
kind |= 0b10; // Procedural
547+
kind |= 0b010; // Procedural
548+
}
549+
if ( hostname === '*' ) {
550+
kind |= 0b100; // Applies everywhere
558551
}
559552

560553
writer.push([ 8, hostname, kind, compiled ]);
@@ -637,8 +630,21 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
637630

638631
// hash, example.com, .promoted-tweet
639632
// hash, example.*, .promoted-tweet
633+
//
634+
// https://github.com/uBlockOrigin/uBlock-issues/issues/803
635+
// Handle specific filters meant to apply everywhere, i.e. selectors
636+
// not to be injected conditionally through the DOM surveyor.
637+
// hash, *, .promoted-tweet
640638
case 8:
641-
this.specificFilters.store(args[1], args[2], args[3]);
639+
if ( args[2] === 0b100 ) {
640+
if ( this.reSimpleHighGeneric.test(args[3]) )
641+
this.highlyGeneric.simple.dict.add(args[3]);
642+
else {
643+
this.highlyGeneric.complex.dict.add(args[3]);
644+
}
645+
break;
646+
}
647+
this.specificFilters.store(args[1], args[2] & 0b011, args[3]);
642648
break;
643649

644650
default:
@@ -666,11 +672,21 @@ FilterContainer.prototype.skipGenericCompiledContent = function(reader) {
666672

667673
switch ( args[0] ) {
668674

669-
// hash, example.com, .promoted-tweet
670-
// hash, example.*, .promoted-tweet
675+
// https://github.com/uBlockOrigin/uBlock-issues/issues/803
676+
// Handle specific filters meant to apply everywhere, i.e. selectors
677+
// not to be injected conditionally through the DOM surveyor.
678+
// hash, *, .promoted-tweet
671679
case 8:
672680
this.duplicateBuster.add(fingerprint);
673-
this.specificFilters.store(args[1], args[2], args[3]);
681+
if ( args[2] === 0b100 ) {
682+
if ( this.reSimpleHighGeneric.test(args[3]) )
683+
this.highlyGeneric.simple.dict.add(args[3]);
684+
else {
685+
this.highlyGeneric.complex.dict.add(args[3]);
686+
}
687+
break;
688+
}
689+
this.specificFilters.store(args[1], args[2] & 0b011, args[3]);
674690
break;
675691

676692
default:
@@ -699,7 +715,6 @@ FilterContainer.prototype.toSelfie = function() {
699715
acceptedCount: this.acceptedCount,
700716
discardedCount: this.discardedCount,
701717
specificFilters: this.specificFilters.toSelfie(),
702-
hasGenericHide: this.hasGenericHide,
703718
lowlyGenericSID: Array.from(this.lowlyGeneric.id.simple),
704719
lowlyGenericCID: Array.from(this.lowlyGeneric.id.complex),
705720
lowlyGenericSCL: Array.from(this.lowlyGeneric.cl.simple),
@@ -715,7 +730,6 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
715730
this.acceptedCount = selfie.acceptedCount;
716731
this.discardedCount = selfie.discardedCount;
717732
this.specificFilters.fromSelfie(selfie.specificFilters);
718-
this.hasGenericHide = selfie.hasGenericHide;
719733
this.lowlyGeneric.id.simple = new Set(selfie.lowlyGenericSID);
720734
this.lowlyGeneric.id.complex = new Map(selfie.lowlyGenericCID);
721735
this.lowlyGeneric.cl.simple = new Set(selfie.lowlyGenericSCL);
@@ -724,6 +738,11 @@ FilterContainer.prototype.fromSelfie = function(selfie) {
724738
this.highlyGeneric.simple.str = selfie.highSimpleGenericHideArray.join(',\n');
725739
this.highlyGeneric.complex.dict = new Set(selfie.highComplexGenericHideArray);
726740
this.highlyGeneric.complex.str = selfie.highComplexGenericHideArray.join(',\n');
741+
this.needDOMSurveyor =
742+
selfie.lowlyGenericSID.length !== 0 ||
743+
selfie.lowlyGenericCID.length !== 0 ||
744+
selfie.lowlyGenericSCL.length !== 0 ||
745+
selfie.lowlyGenericCCL.length !== 0;
727746
this.frozen = true;
728747
};
729748

@@ -986,7 +1005,7 @@ FilterContainer.prototype.retrieveSpecificSelectors = function(
9861005
highGenericHideComplex: '',
9871006
injectedHideFilters: '',
9881007
networkFilters: '',
989-
noDOMSurveying: this.hasGenericHide === false,
1008+
noDOMSurveying: this.needDOMSurveyor === false,
9901009
proceduralFilters: []
9911010
};
9921011

src/js/reverselookup-worker.js

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -128,22 +128,25 @@ const fromCosmeticFilter = function(details) {
128128
return a.length > b.length ? a : b;
129129
});
130130

131-
const regexFromLabels = (hn, suffix) =>
131+
const regexFromLabels = (prefix, hn, suffix) =>
132132
new RegExp(
133-
'^' +
133+
prefix +
134134
hn.split('.').reduce((acc, item) => `(${acc}\\.)?${item}`) +
135135
suffix
136136
);
137137

138-
const reHostname = regexFromLabels(hostname, '$');
138+
// https://github.com/uBlockOrigin/uBlock-issues/issues/803
139+
// Support looking up selectors of the form `*##...`
140+
const reHostname = regexFromLabels('^', hostname, '$');
139141
let reEntity;
140142
{
141143
const domain = details.domain;
142144
const pos = domain.indexOf('.');
143145
if ( pos !== -1 ) {
144146
reEntity = regexFromLabels(
147+
'^(',
145148
hostname.slice(0, pos + hostname.length - domain.length),
146-
'\\.\\*$'
149+
'\\.)?\\*$'
147150
);
148151
}
149152
}
@@ -218,8 +221,8 @@ const fromCosmeticFilter = function(details) {
218221
case 8:
219222
// HTML filtering
220223
case 64:
221-
if ( exception !== ((fargs[2] & 0b01) !== 0) ) { break; }
222-
isProcedural = (fargs[2] & 0b10) !== 0;
224+
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
225+
isProcedural = (fargs[2] & 0b010) !== 0;
223226
if (
224227
isProcedural === false && fargs[3] !== selector ||
225228
isProcedural && JSON.parse(fargs[3]).raw !== selector
@@ -240,7 +243,7 @@ const fromCosmeticFilter = function(details) {
240243
break;
241244
// Scriptlet injection
242245
case 32:
243-
if ( exception !== ((fargs[2] & 1) !== 0) ) { break; }
246+
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
244247
if ( fargs[3] !== selector ) { break; }
245248
if ( hostnameMatches(fargs[1]) ) {
246249
found = fargs[1] + prefix + selector;

src/js/static-ext-filtering.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
const reParseRegexLiteral = /^\/(.+)\/([imu]+)?$/;
5757
const emptyArray = [];
5858
const parsed = {
59-
hostnames: [],
6059
exception: false,
60+
hostnames: [],
6161
suffix: ''
6262
};
6363

@@ -871,7 +871,7 @@
871871
}
872872
}
873873

874-
let c0 = suffix.charCodeAt(0);
874+
const c0 = suffix.charCodeAt(0);
875875

876876
// New shorter syntax for scriptlet injection engine.
877877
if ( c0 === 0x2B /* '+' */ && suffix.startsWith('+js') ) {

0 commit comments

Comments
 (0)