Skip to content
This repository was archived by the owner on Mar 24, 2022. It is now read-only.

Commit 54e1feb

Browse files
committed
refactor: Inline preprocessor & tokenizer error mixins
1 parent 758f13c commit 54e1feb

10 files changed

Lines changed: 67 additions & 129 deletions

File tree

packages/parse5/lib/common/error-codes.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
import { Location } from './token.js';
2+
3+
export interface ParserError extends Location {
4+
code: ERR;
5+
}
6+
7+
export type ParserErrorHandler = (error: ParserError) => void;
8+
19
export enum ERR {
210
controlCharacterInInputStream = 'control-character-in-input-stream',
311
noncharacterInInputStream = 'noncharacter-in-input-stream',

packages/parse5/lib/extensions/error-reporting/mixin-base.ts

Lines changed: 0 additions & 57 deletions
This file was deleted.

packages/parse5/lib/extensions/error-reporting/parser-mixin.ts

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
1-
import { ErrorReportingMixinOptions, BASE_ERROR } from './mixin-base.js';
2-
import { ErrorReportingTokenizerMixin } from './tokenizer-mixin.js';
31
import { Mixin } from '../../utils/mixin.js';
42
import type { Location, Token } from '../../common/token.js';
53
import type { Parser } from '../../parser/index.js';
64
import type { TreeAdapterTypeMap } from '../../tree-adapters/interface.js';
7-
import type { ERR } from '../../common/error-codes.js';
5+
import type { ERR, ParserErrorHandler } from '../../common/error-codes.js';
86

97
export class ErrorReportingParserMixin<T extends TreeAdapterTypeMap> extends Mixin<Parser<T>> {
10-
private onParseError: ErrorReportingMixinOptions['onParseError'];
8+
private onParseError: ParserErrorHandler;
119
ctLoc: null | Location = null;
1210
locBeforeToken = false;
1311

14-
constructor(parser: Parser<T>, private opts: ErrorReportingMixinOptions) {
12+
constructor(private parser: Parser<T>, opts: { onParseError: ParserErrorHandler }) {
1513
super(parser);
1614
this.onParseError = opts.onParseError;
1715
}
1816

1917
_reportError(code: ERR) {
20-
const err = { ...BASE_ERROR, code };
18+
const err = this.parser.tokenizer!.preprocessor.getError(code);
2119

2220
if (this.ctLoc) {
2321
err.startLine = this.ctLoc.startLine;
@@ -36,8 +34,6 @@ export class ErrorReportingParserMixin<T extends TreeAdapterTypeMap> extends Mix
3634
return {
3735
_bootstrap(this: Parser<T>, document: T['document'], fragmentContext: T['element'] | null) {
3836
orig._bootstrap.call(this, document, fragmentContext);
39-
40-
Mixin.install(this.tokenizer, ErrorReportingTokenizerMixin, mxn.opts);
4137
},
4238

4339
_processInputToken(token: Token) {

packages/parse5/lib/extensions/error-reporting/preprocessor-mixin.ts

Lines changed: 0 additions & 22 deletions
This file was deleted.

packages/parse5/lib/extensions/error-reporting/tokenizer-mixin.ts

Lines changed: 0 additions & 21 deletions
This file was deleted.

packages/parse5/lib/parser/index.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,17 @@ import {
1717
DOCUMENT_MODE,
1818
isNumberedHeader,
1919
} from '../common/html.js';
20-
import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface';
21-
import type { ParserError } from '../extensions/error-reporting/mixin-base';
22-
import { TokenType, getTokenAttr, Token, CommentToken, CharacterToken, TagToken, DoctypeToken } from '../common/token';
20+
import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface.js';
21+
import type { ParserError } from '../common/error-codes.js';
22+
import {
23+
TokenType,
24+
getTokenAttr,
25+
Token,
26+
CommentToken,
27+
CharacterToken,
28+
TagToken,
29+
DoctypeToken,
30+
} from '../common/token.js';
2331

2432
//Misc constants
2533
const HIDDEN_INPUT_TYPE = 'hidden';

packages/parse5/lib/tokenizer/index.test.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import * as parse5 from '../index.js';
22
import { Tokenizer } from './index.js';
3-
import { Mixin } from '../utils/mixin.js';
4-
import { ErrorReportingTokenizerMixin } from '../extensions/error-reporting/tokenizer-mixin.js';
53
import { generateTokenizationTests } from '../../../../test/utils/generate-tokenization-tests.js';
64

75
const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url);
86

97
generateTokenizationTests('tokenizer', 'Tokenizer', dataPath.pathname, ({ errors }) => {
10-
const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true });
11-
12-
Mixin.install(tokenizer, ErrorReportingTokenizerMixin, {
8+
const tokenizer = new Tokenizer({
9+
sourceCodeLocationInfo: true,
1310
onParseError(err) {
1411
errors.push({
1512
code: err.code,

packages/parse5/lib/tokenizer/index.ts

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import {
1919
Location,
2020
} from '../common/token.js';
2121
import { namedEntityData as neTree } from './named-entity-data.js';
22-
import { ERR } from '../common/error-codes.js';
22+
import { ERR, ParserErrorHandler } from '../common/error-codes.js';
2323

2424
//C1 Unicode control character reference replacements
2525
const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([
@@ -224,7 +224,7 @@ function findNamedEntityTreeBranch(nodeIx: number, cp: number): number {
224224

225225
//Tokenizer
226226
export class Tokenizer {
227-
preprocessor = new Preprocessor();
227+
preprocessor: Preprocessor;
228228

229229
tokenQueue: Token[] = [];
230230

@@ -245,19 +245,22 @@ export class Tokenizer {
245245
currentAttr: Attribute = { name: '', value: '' };
246246

247247
private addLocationInfo;
248+
private onParseError;
248249

249-
constructor(options: { sourceCodeLocationInfo?: boolean }) {
250+
constructor(options: { sourceCodeLocationInfo?: boolean; onParseError?: ParserErrorHandler | null }) {
250251
this.addLocationInfo = !!options.sourceCodeLocationInfo;
252+
this.onParseError = options.onParseError ?? null;
253+
this.preprocessor = new Preprocessor(options);
251254
}
252255

253256
//Errors
254-
_err(_err: string) {
255-
// NOTE: err reporting is noop by default. Enabled by mixin.
257+
private _err(code: ERR) {
258+
this.onParseError?.(this.preprocessor.getError(code));
256259
}
257260

258-
private _errOnNextCodePoint(err: string) {
261+
private _errOnNextCodePoint(code: ERR) {
259262
this._consume();
260-
this._err(err);
263+
this._err(code);
261264
this._unconsume();
262265
}
263266

packages/parse5/lib/tokenizer/preprocessor.ts

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as unicode from '../common/unicode.js';
2-
import { ERR } from '../common/error-codes.js';
2+
import { ERR, ParserError, ParserErrorHandler } from '../common/error-codes.js';
33

44
//Aliases
55
const $ = unicode.CODE_POINTS;
@@ -25,6 +25,12 @@ export class Preprocessor {
2525
droppedBufferSize = 0;
2626
line = 1;
2727

28+
onParseError: ParserErrorHandler | null;
29+
30+
constructor(options: { onParseError?: ParserErrorHandler | null }) {
31+
this.onParseError = options.onParseError ?? null;
32+
}
33+
2834
/** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
2935
get col(): number {
3036
return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos);
@@ -34,8 +40,27 @@ export class Preprocessor {
3440
return this.droppedBufferSize + this.pos;
3541
}
3642

37-
_err(_err: string) {
38-
// NOTE: err reporting is noop by default. Enabled by mixin.
43+
public getError(code: ERR): ParserError {
44+
const { line, col, offset } = this;
45+
46+
return {
47+
code,
48+
startLine: line,
49+
endLine: line,
50+
startCol: col,
51+
endCol: col,
52+
startOffset: offset,
53+
endOffset: offset,
54+
};
55+
}
56+
57+
//NOTE: avoid reporting error twice on advance/retreat
58+
private lastErrOffset = -1;
59+
private _err(code: ERR) {
60+
if (this.onParseError && this.lastErrOffset !== this.offset) {
61+
this.lastErrOffset = this.offset;
62+
this.onParseError(this.getError(code));
63+
}
3964
}
4065

4166
private _addGap() {
@@ -147,7 +172,8 @@ export class Preprocessor {
147172
//range (ASCII alphanumeric, whitespaces, big chunk of BMP)
148173
//before going into detailed performance cost validation.
149174
const isCommonValidRange =
150-
(cp > 0x1f && cp < 0x7f) || cp === $.LINE_FEED || cp === $.CARRIAGE_RETURN || (cp > 0x9f && cp < 0xfdd0);
175+
this.onParseError !== null &&
176+
((cp > 0x1f && cp < 0x7f) || cp === $.LINE_FEED || cp === $.CARRIAGE_RETURN || (cp > 0x9f && cp < 0xfdd0));
151177

152178
if (!isCommonValidRange) {
153179
this._checkForProblematicCharacters(cp);

test/utils/generate-parsing-tests.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import { ParserOptions } from './../../packages/parse5/lib/parser/index';
2-
import { ParserError } from './../../packages/parse5/lib/extensions/error-reporting/mixin-base';
1+
import { ParserOptions } from './../../packages/parse5/lib/parser/index.js';
2+
import { ParserError } from './../../packages/parse5/lib/common/error-codes.js';
33
import * as fs from 'fs';
44
import * as path from 'path';
55
import * as assert from 'assert';

0 commit comments

Comments
 (0)