Skip to content

Commit 50048bb

Browse files
committed
HTML: support unofficial XML tag
1 parent 106dd22 commit 50048bb

File tree

3 files changed

+58
-31
lines changed

3 files changed

+58
-31
lines changed

html/hash.go

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,61 @@
11
package html
22

3-
// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate
4-
53
// uses github.com/tdewolff/hasher
64
//go:generate hasher -type=Hash -file=hash.go
75

86
// Hash defines perfect hashes for a predefined list of strings
97
type Hash uint32
108

11-
// Unique hash definitions to be used instead of strings
9+
// Identifiers for the hashes associated with the text in the comments.
1210
const (
1311
Iframe Hash = 0x6 // iframe
1412
Math Hash = 0x604 // math
15-
Plaintext Hash = 0x1e09 // plaintext
13+
Plaintext Hash = 0x2109 // plaintext
1614
Script Hash = 0xa06 // script
1715
Style Hash = 0x1405 // style
1816
Svg Hash = 0x1903 // svg
19-
Textarea Hash = 0x2308 // textarea
17+
Textarea Hash = 0x2608 // textarea
2018
Title Hash = 0xf05 // title
21-
Xmp Hash = 0x1c03 // xmp
19+
Xml Hash = 0x1c03 // xml
20+
Xmp Hash = 0x1f03 // xmp
2221
)
2322

24-
// String returns the hash' name.
23+
//var HashMap = map[string]Hash{
24+
// "iframe": Iframe,
25+
// "math": Math,
26+
// "plaintext": Plaintext,
27+
// "script": Script,
28+
// "style": Style,
29+
// "svg": Svg,
30+
// "textarea": Textarea,
31+
// "title": Title,
32+
// "xml": Xml,
33+
// "xmp": Xmp,
34+
//}
35+
36+
// String returns the text associated with the hash.
2537
func (i Hash) String() string {
38+
return string(i.Bytes())
39+
}
40+
41+
// Bytes returns the text associated with the hash.
42+
func (i Hash) Bytes() []byte {
2643
start := uint32(i >> 8)
2744
n := uint32(i & 0xff)
2845
if start+n > uint32(len(_Hash_text)) {
29-
return ""
46+
return []byte{}
3047
}
3148
return _Hash_text[start : start+n]
3249
}
3350

34-
// ToHash returns the hash whose name is s. It returns zero if there is no
35-
// such hash. It is case sensitive.
51+
// ToHash returns a hash Hash for a given []byte. Hash is a uint32 that is associated with the text in []byte. It returns zero if no match found.
3652
func ToHash(s []byte) Hash {
3753
if len(s) == 0 || len(s) > _Hash_maxLen {
3854
return 0
3955
}
56+
//if 3 < len(s) {
57+
// return HashMap[string(s)]
58+
//}
4059
h := uint32(_Hash_hash0)
4160
for i := 0; i < len(s); i++ {
4261
h ^= uint32(s[i])
@@ -64,18 +83,21 @@ NEXT:
6483
return 0
6584
}
6685

67-
const _Hash_hash0 = 0x9acb0442
86+
const _Hash_hash0 = 0xb4b790b3
6887
const _Hash_maxLen = 9
69-
const _Hash_text = "iframemathscriptitlestylesvgxmplaintextarea"
88+
89+
var _Hash_text = []byte("" +
90+
"iframemathscriptitlestylesvgxmlxmplaintextarea")
7091

7192
var _Hash_table = [1 << 4]Hash{
72-
0x0: 0x2308, // textarea
73-
0x2: 0x6, // iframe
74-
0x4: 0xf05, // title
75-
0x5: 0x1e09, // plaintext
76-
0x7: 0x1405, // style
77-
0x8: 0x604, // math
78-
0x9: 0xa06, // script
79-
0xa: 0x1903, // svg
80-
0xb: 0x1c03, // xmp
93+
0x2: 0xa06, // script
94+
0x3: 0xf05, // title
95+
0x4: 0x1405, // style
96+
0x5: 0x604, // math
97+
0x6: 0x6, // iframe
98+
0x8: 0x1c03, // xml
99+
0x9: 0x2608, // textarea
100+
0xc: 0x1f03, // xmp
101+
0xe: 0x2109, // plaintext
102+
0xf: 0x1903, // svg
81103
}

html/lex.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ const (
2121
EndTagToken
2222
AttributeToken
2323
TextToken
24-
SvgToken
24+
SVGToken
2525
MathToken
26+
XMLToken
2627
TemplateToken
2728
)
2829

@@ -47,10 +48,12 @@ func (tt TokenType) String() string {
4748
return "Attribute"
4849
case TextToken:
4950
return "Text"
50-
case SvgToken:
51-
return "Svg"
51+
case SVGToken:
52+
return "SVG"
5253
case MathToken:
5354
return "Math"
55+
case XMLToken:
56+
return "XML"
5457
case TemplateToken:
5558
return "Template"
5659
}
@@ -373,18 +376,20 @@ func (l *Lexer) shiftStartTag() (TokenType, []byte) {
373376
l.r.Move(1)
374377
}
375378
l.text = parse.ToLower(l.r.Lexeme()[1:])
376-
if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math {
377-
if h == Svg || h == Math {
379+
if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math || h == Xml {
380+
if h == Svg || h == Math || h == Xml {
378381
data := l.shiftXML(h)
379382
if l.err != nil {
380383
return ErrorToken, nil
381384
}
382385

383386
l.inTag = false
384387
if h == Svg {
385-
return SvgToken, data
388+
return SVGToken, data
389+
} else if h == Math {
390+
return MathToken, data
386391
}
387-
return MathToken, data
392+
return XMLToken, data
388393
}
389394
l.rawTag = h
390395
}

html/lex_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ func TestTokens(t *testing.T) {
4242
{"<script><!--var x='<script></script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
4343
{"<script><!--var x='<script>';--></script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
4444
{"<![CDATA[ test ]]>", TTs{TextToken}},
45-
{"<svg>text</svg>", TTs{SvgToken}},
45+
{"<svg>text</svg>", TTs{SVGToken}},
4646
{"<math>text</math gibberish>", TTs{MathToken}},
47-
{`<svg>text<x a="</svg>"></x></svg>`, TTs{SvgToken}},
48-
{"<a><svg>text</svg></a>", TTs{StartTagToken, StartTagCloseToken, SvgToken, EndTagToken}},
47+
{`<svg>text<x a="</svg>"></x></svg>`, TTs{SVGToken}},
48+
{"<a><svg>text</svg></a>", TTs{StartTagToken, StartTagCloseToken, SVGToken, EndTagToken}},
4949

5050
// early endings
5151
{"<!-- comment", TTs{CommentToken}},

0 commit comments

Comments
 (0)