@@ -2,14 +2,13 @@ package codegen
2
2
3
3
import (
4
4
"bufio"
5
+ "encoding/xml"
5
6
"fmt"
6
7
"html"
7
8
"io"
9
+ "log"
8
10
"regexp"
9
11
"strings"
10
-
11
- xhtml "golang.org/x/net/html"
12
- "golang.org/x/net/html/atom"
13
12
)
14
13
15
14
var reNewline = regexp .MustCompile (`\r?\n` )
@@ -152,7 +151,11 @@ func getLeadingWhitespace(v string) string {
152
151
153
152
// generateDoc will generate the proper doc string for html encoded or plain text doc entries.
154
153
func generateDoc (htmlSrc string ) string {
155
- tokenizer := xhtml .NewTokenizer (strings .NewReader (htmlSrc ))
154
+ tokenizer := xml .NewDecoder (strings .NewReader (htmlSrc ))
155
+ tokenizer .Strict = false
156
+ tokenizer .AutoClose = xml .HTMLAutoClose
157
+ tokenizer .Entity = xml .HTMLEntity
158
+
156
159
var builder strings.Builder
157
160
if err := encodeHTMLToText (& builder , tokenizer ); err != nil {
158
161
panic (fmt .Sprintf ("failed to generated docs, %v" , err ))
@@ -168,31 +171,30 @@ type stringWriter interface {
168
171
WriteString (string ) (int , error )
169
172
}
170
173
171
- func encodeHTMLToText (w stringWriter , z * xhtml. Tokenizer ) error {
174
+ func encodeHTMLToText (w stringWriter , z * xml. Decoder ) error {
172
175
encoder := newHTMLTokenEncoder (w )
173
176
defer encoder .Flush ()
174
177
175
178
for {
176
- tt := z .Next ()
177
- if tt == xhtml .ErrorToken {
178
- if err := z .Err (); err == io .EOF {
179
- return nil
180
- } else if err != nil {
181
- return err
182
- }
179
+ tt , err := z .Token ()
180
+ if err == io .EOF {
181
+ return nil
182
+ }
183
+ if err != nil {
184
+ return err
183
185
}
184
186
185
- if err := encoder .Encode (z . Token () ); err != nil {
187
+ if err := encoder .Encode (tt ); err != nil {
186
188
return err
187
189
}
188
190
}
189
191
}
190
192
191
193
type htmlTokenHandler interface {
192
- OnStartTagToken (xhtml. Token ) htmlTokenHandler
193
- OnEndTagToken (xhtml .Token , bool )
194
- OnSelfClosingTagToken (xhtml .Token )
195
- OnTextTagToken (xhtml. Token )
194
+ OnStartTagToken (xml. StartElement ) htmlTokenHandler
195
+ OnEndTagToken (xml .Token , bool )
196
+ OnSelfClosingTagToken (xml .Token )
197
+ OnTextTagToken (xml. CharData )
196
198
}
197
199
198
200
type htmlTokenEncoder struct {
@@ -220,44 +222,45 @@ func newHTMLTokenEncoder(w stringWriter) *htmlTokenEncoder {
220
222
}
221
223
222
224
func (e * htmlTokenEncoder ) Flush () error {
223
- e .baseHandler .handler .OnEndTagToken (xhtml. Token { Type : xhtml . TextToken } , true )
225
+ e .baseHandler .handler .OnEndTagToken (xml . CharData ([] byte {}) , true )
224
226
return nil
225
227
}
226
228
227
- func (e * htmlTokenEncoder ) Encode (token xhtml .Token ) error {
229
+ func (e * htmlTokenEncoder ) Encode (token xml .Token ) error {
228
230
h := e .baseHandler
229
231
if len (e .handlers ) != 0 {
230
232
h = e .handlers [len (e .handlers )- 1 ]
231
233
}
232
234
233
- switch token .Type {
234
- case xhtml . StartTagToken :
235
+ switch v := token .( type ) {
236
+ case xml. StartElement :
235
237
e .depth ++
236
238
237
- next := h .handler .OnStartTagToken (token )
239
+ next := h .handler .OnStartTagToken (v )
238
240
if next != nil {
239
241
e .handlers = append (e .handlers , tokenHandlerItem {
240
242
handler : next ,
241
243
depth : e .depth ,
242
244
})
243
245
}
244
246
245
- case xhtml . EndTagToken :
247
+ case xml. EndElement :
246
248
handlerBlockClosing := e .depth == h .depth
247
249
248
250
h .handler .OnEndTagToken (token , handlerBlockClosing )
249
251
250
252
// Remove all but the root handler as the handler is no longer needed.
251
- if handlerBlockClosing {
253
+ if handlerBlockClosing && len ( e . handlers ) != 0 {
252
254
e .handlers = e .handlers [:len (e .handlers )- 1 ]
253
255
}
254
256
e .depth --
257
+ if e .depth < 0 {
258
+ log .Printf ("ignoring unexpected closing tag, %v" , token )
259
+ e .depth = 0
260
+ }
255
261
256
- case xhtml .SelfClosingTagToken :
257
- h .handler .OnSelfClosingTagToken (token )
258
-
259
- case xhtml .TextToken :
260
- h .handler .OnTextTagToken (token )
262
+ case xml.CharData :
263
+ h .handler .OnTextTagToken (v )
261
264
}
262
265
263
266
return nil
@@ -267,11 +270,11 @@ type baseTokenHandler struct {
267
270
w stringWriter
268
271
}
269
272
270
- func (e * baseTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler { return nil }
271
- func (e * baseTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {}
272
- func (e * baseTokenHandler ) OnSelfClosingTagToken (token xhtml .Token ) {}
273
- func (e * baseTokenHandler ) OnTextTagToken (token xhtml. Token ) {
274
- e .w .WriteString (token . Data )
273
+ func (e * baseTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler { return nil }
274
+ func (e * baseTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {}
275
+ func (e * baseTokenHandler ) OnSelfClosingTagToken (token xml .Token ) {}
276
+ func (e * baseTokenHandler ) OnTextTagToken (token xml. CharData ) {
277
+ e .w .WriteString (string ( token ) )
275
278
}
276
279
277
280
type blockTokenHandler struct {
@@ -295,27 +298,27 @@ func newBlockTokenHandler(w stringWriter) *blockTokenHandler {
295
298
},
296
299
}
297
300
}
298
- func (e * blockTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler {
301
+ func (e * blockTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler {
299
302
e .started = true
300
303
if e .newlineBeforeNextBlock {
301
304
e .w .WriteString ("\n " )
302
305
e .newlineBeforeNextBlock = false
303
306
}
304
307
305
- switch token .DataAtom {
306
- case atom . A :
308
+ switch token .Name . Local {
309
+ case "a" :
307
310
return newLinkTokenHandler (e .w , token )
308
- case atom . Ul :
311
+ case "ul" :
309
312
e .w .WriteString ("\n " )
310
313
e .newlineBeforeNextBlock = true
311
314
return newListTokenHandler (e .w )
312
315
313
- case atom . Div , atom . Dt , atom . P , atom . H1 , atom . H2 , atom . H3 , atom . H4 , atom . H5 , atom . H6 :
316
+ case "div" , "dt" , "p" , "h1" , "h2" , "h3" , "h4" , "h5" , "h6" :
314
317
e .w .WriteString ("\n " )
315
318
e .newlineBeforeNextBlock = true
316
319
return newBlockTokenHandler (e .w )
317
320
318
- case atom . Pre , atom . Code :
321
+ case "pre" , "code" :
319
322
if e .rootBlock {
320
323
e .w .WriteString ("\n " )
321
324
e .w .WriteString (indent )
@@ -326,7 +329,7 @@ func (e *blockTokenHandler) OnStartTagToken(token xhtml.Token) htmlTokenHandler
326
329
327
330
return nil
328
331
}
329
- func (e * blockTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
332
+ func (e * blockTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
330
333
if ! blockClosing {
331
334
return
332
335
}
@@ -340,34 +343,34 @@ func (e *blockTokenHandler) OnEndTagToken(token xhtml.Token, blockClosing bool)
340
343
e .strBuilder .Reset ()
341
344
}
342
345
343
- func (e * blockTokenHandler ) OnTextTagToken (token xhtml. Token ) {
346
+ func (e * blockTokenHandler ) OnTextTagToken (token xml. CharData ) {
344
347
if e .newlineBeforeNextBlock {
345
348
e .w .WriteString ("\n " )
346
349
e .newlineBeforeNextBlock = false
347
350
}
348
351
if ! e .started {
349
- token . Data = strings .TrimLeft (token . Data , " \t \n " )
352
+ token = xml . CharData ( strings .TrimLeft (string ( token ) , " \t \n " ) )
350
353
}
351
- if len (token . Data ) != 0 {
354
+ if len (token ) != 0 {
352
355
e .started = true
353
356
}
354
357
e .baseTokenHandler .OnTextTagToken (token )
355
358
}
356
359
357
360
type linkTokenHandler struct {
358
361
baseTokenHandler
359
- linkToken xhtml. Token
362
+ linkToken xml. StartElement
360
363
}
361
364
362
- func newLinkTokenHandler (w stringWriter , token xhtml. Token ) * linkTokenHandler {
365
+ func newLinkTokenHandler (w stringWriter , token xml. StartElement ) * linkTokenHandler {
363
366
return & linkTokenHandler {
364
367
baseTokenHandler : baseTokenHandler {
365
368
w : w ,
366
369
},
367
370
linkToken : token ,
368
371
}
369
372
}
370
- func (e * linkTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
373
+ func (e * linkTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
371
374
if ! blockClosing {
372
375
return
373
376
}
@@ -390,9 +393,9 @@ func newListTokenHandler(w stringWriter) *listTokenHandler {
390
393
},
391
394
}
392
395
}
393
- func (e * listTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler {
394
- switch token .DataAtom {
395
- case atom . Li :
396
+ func (e * listTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler {
397
+ switch token .Name . Local {
398
+ case "li" :
396
399
if e .items >= 1 {
397
400
e .w .WriteString ("\n \n " )
398
401
}
@@ -402,7 +405,7 @@ func (e *listTokenHandler) OnStartTagToken(token xhtml.Token) htmlTokenHandler {
402
405
return nil
403
406
}
404
407
405
- func (e * listTokenHandler ) OnTextTagToken (token xhtml. Token ) {
408
+ func (e * listTokenHandler ) OnTextTagToken (token xml. CharData ) {
406
409
// Squash whitespace between list and items
407
410
}
408
411
@@ -423,14 +426,14 @@ func newListItemTokenHandler(w stringWriter) *listItemTokenHandler {
423
426
},
424
427
}
425
428
}
426
- func (e * listItemTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler {
427
- switch token .DataAtom {
428
- case atom . P :
429
+ func (e * listItemTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler {
430
+ switch token .Name . Local {
431
+ case "p" :
429
432
return newBlockTokenHandler (e .w )
430
433
}
431
434
return nil
432
435
}
433
- func (e * listItemTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
436
+ func (e * listItemTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
434
437
if ! blockClosing {
435
438
return
436
439
}
@@ -456,18 +459,18 @@ func newTrimSpaceTokenHandler(w stringWriter) *trimSpaceTokenHandler {
456
459
},
457
460
}
458
461
}
459
- func (e * trimSpaceTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
462
+ func (e * trimSpaceTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
460
463
if ! blockClosing {
461
464
return
462
465
}
463
466
464
467
e .origWriter .WriteString (strings .TrimSpace (e .strBuilder .String ()))
465
468
}
466
469
467
- func getHTMLTokenAttr (attr []xhtml. Attribute , name string ) (string , bool ) {
470
+ func getHTMLTokenAttr (attr []xml. Attr , name string ) (string , bool ) {
468
471
for _ , a := range attr {
469
- if strings .EqualFold (a .Key , name ) {
470
- return a .Val , true
472
+ if strings .EqualFold (a .Name . Local , name ) {
473
+ return a .Value , true
471
474
}
472
475
}
473
476
return "" , false
0 commit comments