Skip to content
This repository was archived by the owner on Mar 24, 2022. It is now read-only.

Commit 3ab6403

Browse files
committed
refactor: Remove PositionTrackingPreprocessorMixin
1 parent 1de6e63 commit 3ab6403

8 files changed

Lines changed: 55 additions & 79 deletions

File tree

packages/html-rewriting-stream/lib/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { EndTag, StartTag, Doctype, Text, Comment, SaxToken } from '../../sax-pa
22
import type { Token, Location } from '@parse5/parse5/lib/common/token.js';
33
import { SAXParser } from '@parse5/sax-parser/lib/index.js';
44
import { escapeString } from '@parse5/parse5/lib/serializer/index.js';
5-
import type { PositionTrackingPreprocessorMixin } from '@parse5/parse5/lib/extensions/position-tracking/preprocessor-mixin';
5+
import type { Preprocessor } from '@parse5/parse5/lib/tokenizer/preprocessor.js';
66

77
/**
88
* Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
@@ -54,7 +54,7 @@ import type { PositionTrackingPreprocessorMixin } from '@parse5/parse5/lib/exten
5454
* ```
5555
*/
5656
export class RewritingStream extends SAXParser {
57-
posTracker: PositionTrackingPreprocessorMixin;
57+
posTracker: Preprocessor;
5858

5959
/** Note: The `sourceCodeLocationInfo` is always enabled. */
6060
constructor() {

packages/parse5/lib/common/unicode.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,26 +51,26 @@ export const CODE_POINT_SEQUENCES = {
5151
};
5252

5353
//Surrogates
54-
export function isSurrogate(cp: number) {
54+
export function isSurrogate(cp: number): boolean {
5555
return cp >= 0xd800 && cp <= 0xdfff;
5656
}
5757

58-
export function isSurrogatePair(cp: number) {
58+
export function isSurrogatePair(cp: number): boolean {
5959
return cp >= 0xdc00 && cp <= 0xdfff;
6060
}
6161

62-
export function getSurrogatePairCodePoint(cp1: number, cp2: number) {
62+
export function getSurrogatePairCodePoint(cp1: number, cp2: number): number {
6363
return (cp1 - 0xd800) * 0x400 + 0x2400 + cp2;
6464
}
6565

6666
//NOTE: excluding NULL and ASCII whitespace
67-
export function isControlCodePoint(cp: number) {
67+
export function isControlCodePoint(cp: number): boolean {
6868
return (
6969
(cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
7070
(cp >= 0x7f && cp <= 0x9f)
7171
);
7272
}
7373

74-
export function isUndefinedCodePoint(cp: number) {
74+
export function isUndefinedCodePoint(cp: number): boolean {
7575
return (cp >= 0xfdd0 && cp <= 0xfdef) || UNDEFINED_CODE_POINTS.has(cp);
7676
}

packages/parse5/lib/extensions/error-reporting/preprocessor-mixin.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
import { ErrorReportingMixinBase, ErrorReportingMixinOptions } from './mixin-base.js';
2-
import { PositionTrackingPreprocessorMixin } from '../position-tracking/preprocessor-mixin.js';
3-
import { Mixin } from '../../utils/mixin.js';
42
import type { Preprocessor } from '../../tokenizer/preprocessor.js';
53
import type { ERR } from '../../common/error-codes.js';
64

75
export class ErrorReportingPreprocessorMixin extends ErrorReportingMixinBase<Preprocessor> {
8-
posTracker: PositionTrackingPreprocessorMixin;
6+
posTracker: Preprocessor;
97
lastErrOffset = -1;
108

119
constructor(preprocessor: Preprocessor, opts: ErrorReportingMixinOptions) {
1210
super(preprocessor, opts);
1311

14-
this.posTracker = Mixin.install(preprocessor, PositionTrackingPreprocessorMixin);
12+
this.posTracker = preprocessor;
1513
}
1614

1715
override _reportError(code: ERR) {

packages/parse5/lib/extensions/location-info/parser-mixin.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ import { Mixin } from '../../utils/mixin.js';
33
import { LocationInfoTokenizerMixin } from './tokenizer-mixin.js';
44
import { TAG_NAMES as $, NAMESPACES as NS } from '../../common/html.js';
55
import type { TreeAdapter, TreeAdapterTypeMap, ElementLocation } from '../../tree-adapters/interface';
6+
import type { Preprocessor } from './../../tokenizer/preprocessor.js';
67
import type { Parser } from '../../parser/index.js';
7-
import type { PositionTrackingPreprocessorMixin } from '../position-tracking/preprocessor-mixin';
88
import { TokenType, Token, TagToken } from '../../common/token.js';
99

1010
export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin<Parser<T>> {
1111
treeAdapter: TreeAdapter<T>;
12-
posTracker: PositionTrackingPreprocessorMixin | null = null;
12+
posTracker: Preprocessor | null = null;
1313
lastStartTagToken: null | TagToken = null;
1414
lastFosterParentingLocation: null | ReturnType<Parser<T>['_findFosterParentingLocation']> = null;
1515
currentToken: Token | null = null;

packages/parse5/lib/extensions/location-info/tokenizer-mixin.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import { Mixin } from '../../utils/mixin.js';
22
import { Tokenizer } from '../../tokenizer/index.js';
3-
import { PositionTrackingPreprocessorMixin } from '../position-tracking/preprocessor-mixin.js';
3+
import type { Preprocessor } from './../../tokenizer/preprocessor.js';
44
import { TokenType, Location, LocationWithAttributes } from '../../common/token.js';
55

66
export class LocationInfoTokenizerMixin extends Mixin<Tokenizer> {
7-
posTracker: PositionTrackingPreprocessorMixin;
7+
posTracker: Preprocessor;
88
currentAttrLocation: Location | null = null;
99
ctLoc: Location | null = null;
1010

1111
constructor(private tokenizer: Tokenizer) {
1212
super(tokenizer);
1313

14-
this.posTracker = Mixin.install(tokenizer.preprocessor, PositionTrackingPreprocessorMixin);
14+
this.posTracker = tokenizer.preprocessor;
1515
}
1616

1717
_getCurrentLocation(): Location {

packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.ts

Lines changed: 0 additions & 55 deletions
This file was deleted.

packages/parse5/lib/tokenizer/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ export class Tokenizer {
460460
//1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
461461
//2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
462462
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
463-
_appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string) {
463+
private _appendCharToCurrentCharacterToken(type: CharacterToken['type'], ch: string) {
464464
if (this.currentCharacterToken && this.currentCharacterToken.type !== type) {
465465
this._emitCurrentCharacterToken();
466466
}
@@ -472,7 +472,7 @@ export class Tokenizer {
472472
}
473473
}
474474

475-
_emitCodePoint(cp: number) {
475+
private _emitCodePoint(cp: number) {
476476
let type = TokenType.CHARACTER;
477477

478478
if (isWhitespace(cp)) {

packages/parse5/lib/tokenizer/preprocessor.ts

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,25 @@ const DEFAULT_BUFFER_WATERLINE = 1 << 16;
1212
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
1313
export class Preprocessor {
1414
html: string | null = null;
15-
pos = -1;
16-
lastGapPos = -1;
17-
lastCharPos = -1;
18-
gapStack: number[] = [];
19-
skipNextNewLine = false;
20-
lastChunkWritten = false;
15+
private pos = -1;
16+
private lastGapPos = -1;
17+
private lastCharPos = -1;
18+
private gapStack: number[] = [];
19+
private skipNextNewLine = false;
20+
private lastChunkWritten = false;
2121
endOfChunkHit = false;
2222
bufferWaterline = DEFAULT_BUFFER_WATERLINE;
2323

24+
private isEol = false;
25+
lineStartPos = 0;
26+
droppedBufferSize = 0;
27+
col = 0;
28+
line = 1;
29+
30+
get offset(): number {
31+
return this.droppedBufferSize + this.pos;
32+
}
33+
2434
_err(_err: string) {
2535
// NOTE: err reporting is noop by default. Enabled by mixin.
2636
}
@@ -59,13 +69,20 @@ export class Preprocessor {
5969
}
6070

6171
dropParsedChunk() {
72+
const prevPos = this.pos;
73+
6274
if (this.pos > this.bufferWaterline) {
6375
this.lastCharPos -= this.pos;
6476
this.html = this.html!.substring(this.pos);
6577
this.pos = 0;
6678
this.lastGapPos = -1;
6779
this.gapStack = [];
6880
}
81+
82+
const reduction = prevPos - this.pos;
83+
84+
this.lineStartPos -= reduction;
85+
this.droppedBufferSize += reduction;
6986
}
7087

7188
write(chunk: string, isLastChunk: boolean) {
@@ -91,13 +108,26 @@ export class Preprocessor {
91108
advance(): number {
92109
this.pos++;
93110

111+
//NOTE: LF should be in the last column of the line
112+
if (this.isEol) {
113+
this.isEol = false;
114+
this.line++;
115+
this.lineStartPos = this.pos;
116+
}
117+
118+
this.col = this.pos - this.lineStartPos + 1;
119+
94120
if (this.pos > this.lastCharPos) {
95121
this.endOfChunkHit = !this.lastChunkWritten;
96122
return $.EOF;
97123
}
98124

99125
let cp = this.html!.charCodeAt(this.pos);
100126

127+
if (cp === $.LINE_FEED || (cp === $.CARRIAGE_RETURN && this.html!.charCodeAt(this.pos + 1) !== $.LINE_FEED)) {
128+
this.isEol = true;
129+
}
130+
101131
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
102132
//must be ignored.
103133
if (this.skipNextNewLine && cp === $.LINE_FEED) {
@@ -146,5 +176,8 @@ export class Preprocessor {
146176
}
147177

148178
this.pos--;
179+
180+
this.isEol = false;
181+
this.col = this.pos - this.lineStartPos + 1;
149182
}
150183
}

0 commit comments

Comments
 (0)