Skip to content
This repository was archived by the owner on Mar 24, 2022. It is now read-only.

Commit 758f13c

Browse files
committed
refactor: Merge LocationInfoTokenizerMixin into Tokenizer
1 parent 5639c01 commit 758f13c

14 files changed

Lines changed: 218 additions & 309 deletions

File tree

packages/html-rewriting-stream/lib/index.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import { EndTag, StartTag, Doctype, Text, Comment, SaxToken } from '../../sax-pa
22
import type { Token, Location } from '@parse5/parse5/lib/common/token.js';
33
import { SAXParser } from '@parse5/sax-parser/lib/index.js';
44
import { escapeString } from '@parse5/parse5/lib/serializer/index.js';
5-
import type { Preprocessor } from '@parse5/parse5/lib/tokenizer/preprocessor.js';
65

76
/**
87
* Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
@@ -54,13 +53,9 @@ import type { Preprocessor } from '@parse5/parse5/lib/tokenizer/preprocessor.js'
5453
* ```
5554
*/
5655
export class RewritingStream extends SAXParser {
57-
posTracker: Preprocessor;
58-
5956
/** Note: The `sourceCodeLocationInfo` is always enabled. */
6057
constructor() {
6158
super({ sourceCodeLocationInfo: true });
62-
63-
this.posTracker = this.locInfoMixin!.posTracker;
6459
}
6560

6661
override _transformChunk(chunk: string) {
@@ -71,7 +66,7 @@ export class RewritingStream extends SAXParser {
7166
}
7267

7368
_getRawHtml(location: Location) {
74-
const { droppedBufferSize } = this.posTracker;
69+
const { droppedBufferSize } = this.tokenizer!.preprocessor;
7570
const start = location.startOffset - droppedBufferSize;
7671
const end = location.endOffset - droppedBufferSize;
7772

packages/parse5/lib/common/token.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ export interface Location {
2626
}
2727

2828
export interface LocationWithAttributes extends Location {
29-
attrs: Record<string, Location>;
29+
attrs?: Record<string, Location>;
3030
}
3131

3232
interface TokenBase {
3333
readonly type: TokenType;
34-
location?: Location;
34+
location: Location | null;
3535
}
3636

3737
export interface DoctypeToken extends TokenBase {
@@ -59,7 +59,7 @@ export interface TagToken extends TokenBase {
5959
selfClosing: boolean;
6060
ackSelfClosing: boolean;
6161
attrs: Attribute[];
62-
location?: LocationWithAttributes;
62+
location: LocationWithAttributes | null;
6363
}
6464

6565
export function getTokenAttr(token: TagToken, attrName: string) {

packages/parse5/lib/extensions/error-reporting/mixin-base.ts

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,18 @@ export abstract class ErrorReportingMixinBase<Host extends ClassWithErrorReporti
3333
this.onParseError = opts.onParseError;
3434
}
3535

36-
_setErrorLocation(err: ParserError) {
37-
err.startLine = err.endLine = this.posTracker.line;
38-
err.startCol = err.endCol = this.posTracker.col;
39-
err.startOffset = err.endOffset = this.posTracker.offset;
40-
}
41-
4236
_reportError(code: ERR) {
43-
const err = { ...BASE_ERROR, code };
44-
45-
this._setErrorLocation(err);
46-
this.onParseError(err);
37+
const { line, col, offset } = this.posTracker;
38+
39+
this.onParseError({
40+
code,
41+
startLine: line,
42+
endLine: line,
43+
startCol: col,
44+
endCol: col,
45+
startOffset: offset,
46+
endOffset: offset,
47+
});
4748
}
4849

4950
override _getOverriddenMethods(mxn: ErrorReportingMixinBase<Host>, _originalMethods: Host): Partial<Host> {

packages/parse5/lib/extensions/error-reporting/parser-mixin.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { ErrorReportingMixinOptions, BASE_ERROR } from './mixin-base.js';
22
import { ErrorReportingTokenizerMixin } from './tokenizer-mixin.js';
3-
import { LocationInfoTokenizerMixin } from '../location-info/tokenizer-mixin.js';
43
import { Mixin } from '../../utils/mixin.js';
54
import type { Location, Token } from '../../common/token.js';
65
import type { Parser } from '../../parser/index.js';
@@ -39,7 +38,6 @@ export class ErrorReportingParserMixin<T extends TreeAdapterTypeMap> extends Mix
3938
orig._bootstrap.call(this, document, fragmentContext);
4039

4140
Mixin.install(this.tokenizer, ErrorReportingTokenizerMixin, mxn.opts);
42-
Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
4341
},
4442

4543
_processInputToken(token: Token) {

packages/parse5/lib/extensions/location-info/parser-mixin.ts

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import { CommentToken, DoctypeToken, CharacterToken } from '../../common/token';
22
import { Mixin } from '../../utils/mixin.js';
3-
import { LocationInfoTokenizerMixin } from './tokenizer-mixin.js';
43
import { TAG_NAMES as $, NAMESPACES as NS } from '../../common/html.js';
54
import type { TreeAdapter, TreeAdapterTypeMap, ElementLocation } from '../../tree-adapters/interface';
6-
import type { Preprocessor } from './../../tokenizer/preprocessor.js';
75
import type { Parser } from '../../parser/index.js';
86
import { TokenType, Token, TagToken } from '../../common/token.js';
97

108
export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin<Parser<T>> {
119
treeAdapter: TreeAdapter<T>;
12-
posTracker: Preprocessor | null = null;
1310
lastStartTagToken: null | TagToken = null;
1411
lastFosterParentingLocation: null | ReturnType<Parser<T>['_findFosterParentingLocation']> = null;
1512
currentToken: Token | null = null;
@@ -26,7 +23,7 @@ export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin
2623
if (this.lastStartTagToken) {
2724
loc = {
2825
...this.lastStartTagToken.location!,
29-
startTag: this.lastStartTagToken.location,
26+
startTag: this.lastStartTagToken.location!,
3027
};
3128
}
3229

@@ -70,9 +67,6 @@ export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin
7067
mxn.lastFosterParentingLocation = null;
7168
mxn.currentToken = null;
7269

73-
const tokenizerMixin = Mixin.install(this.tokenizer, LocationInfoTokenizerMixin);
74-
75-
mxn.posTracker = tokenizerMixin.posTracker;
7670
this.openElements.onItemPop = (element) => mxn._setEndLocation(element, mxn.currentToken!);
7771
},
7872

@@ -98,11 +92,10 @@ export class LocationInfoParserMixin<T extends TreeAdapterTypeMap> extends Mixin
9892

9993
//NOTE: <body> and <html> are never popped from the stack, so we need to updated
10094
//their end location explicitly.
101-
const requireExplicitUpdate =
95+
if (
10296
token.type === TokenType.END_TAG &&
103-
(token.tagName === $.HTML || (token.tagName === $.BODY && this.openElements.hasInScope($.BODY)));
104-
105-
if (requireExplicitUpdate) {
97+
(token.tagName === $.HTML || (token.tagName === $.BODY && this.openElements.hasInScope($.BODY)))
98+
) {
10699
for (let i = this.openElements.stackTop; i >= 0; i--) {
107100
const element = this.openElements.items[i];
108101

packages/parse5/lib/extensions/location-info/tokenizer-mixin.test.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import * as assert from 'assert';
22
import { Tokenizer, TokenizerMode } from '../../tokenizer/index.js';
3-
import { LocationInfoTokenizerMixin } from './tokenizer-mixin.js';
4-
import { Mixin } from '../../utils/mixin.js';
53
import { TokenType } from './../../common/token.js';
64
import { getSubstringByLineCol, normalizeNewLine } from '../../../../../test/utils/common.js';
75

@@ -84,11 +82,9 @@ it('Location Info (Tokenizer)', () => {
8482
testCases.forEach((testCase) => {
8583
const html = testCase.htmlChunks.join('');
8684
const lines = html.split(/\r?\n/g);
87-
const tokenizer = new Tokenizer();
85+
const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true });
8886
const lastChunkIdx = testCase.htmlChunks.length - 1;
8987

90-
Mixin.install(tokenizer, LocationInfoTokenizerMixin);
91-
9288
for (let i = 0; i < testCase.htmlChunks.length; i++) {
9389
tokenizer.write(testCase.htmlChunks[i], i === lastChunkIdx);
9490
}

packages/parse5/lib/extensions/location-info/tokenizer-mixin.ts

Lines changed: 0 additions & 143 deletions
This file was deleted.

packages/parse5/lib/parser/formatting-element-list.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ generateTestsForEachTreeAdapter('FormattingElementList', (treeAdapter) => {
1212
ackSelfClosing: false,
1313
selfClosing: false,
1414
attrs: [],
15+
location: null,
1516
};
1617
}
1718

packages/parse5/lib/parser/index.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,13 @@ export class Parser<T extends TreeAdapterTypeMap> {
137137
this.treeAdapter = this.options.treeAdapter!;
138138
this.pendingScript = null;
139139

140-
if (this.options.sourceCodeLocationInfo) {
141-
Mixin.install(this, LocationInfoParserMixin as any);
142-
}
143-
144140
if (this.options.onParseError) {
145141
Mixin.install(this, ErrorReportingParserMixin as any, { onParseError: this.options.onParseError });
142+
this.options.sourceCodeLocationInfo = true;
143+
}
144+
145+
if (this.options.sourceCodeLocationInfo) {
146+
Mixin.install(this, LocationInfoParserMixin as any);
146147
}
147148
}
148149

@@ -219,7 +220,7 @@ export class Parser<T extends TreeAdapterTypeMap> {
219220

220221
//Bootstrap parser
221222
_bootstrap(document: T['document'], fragmentContext: T['element'] | null) {
222-
this.tokenizer = new Tokenizer();
223+
this.tokenizer = new Tokenizer(this.options);
223224

224225
this.stopped = false;
225226

packages/parse5/lib/tokenizer/index.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { generateTokenizationTests } from '../../../../test/utils/generate-token
77
const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url);
88

99
generateTokenizationTests('tokenizer', 'Tokenizer', dataPath.pathname, ({ errors }) => {
10-
const tokenizer = new Tokenizer();
10+
const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true });
1111

1212
Mixin.install(tokenizer, ErrorReportingTokenizerMixin, {
1313
onParseError(err) {

0 commit comments

Comments
 (0)