Skip to content

Commit b4b96f2

Browse files
committed
Fix TOML table parsing after standalone comments
Standalone TOML comments immediately inside a table/array-table no longer end the table scope, preventing subsequent keys from being flattened to the document root.
1 parent 2824d66 commit b4b96f2

2 files changed

Lines changed: 114 additions & 20 deletions

File tree

pkg/yqlib/decoder_toml.go

Lines changed: 90 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -329,20 +329,51 @@ func (dec *tomlDecoder) processTable(currentNode *toml.Node) (bool, error) {
329329

330330
var tableValue *toml.Node
331331
runAgainstCurrentExp := false
332-
hasValue := dec.parser.NextExpression()
333-
// check to see if there is any table data
334-
if hasValue {
332+
sawKeyValue := false
333+
for dec.parser.NextExpression() {
335334
tableValue = dec.parser.Expression()
336-
// next expression is not table data, so we are done
335+
// Allow standalone comments inside the table before the first key-value.
336+
// These should be associated with the next element in the table (usually the first key-value),
337+
// not treated as "end of table" (which would cause subsequent key-values to be parsed at root).
338+
if tableValue.Kind == toml.Comment {
339+
dec.pendingComments = append(dec.pendingComments, string(tableValue.Data))
340+
continue
341+
}
342+
343+
// next expression is not table data, so we are done (but we need to re-process it at top-level)
337344
if tableValue.Kind != toml.KeyValue {
338-
log.Debug("got an empty table")
345+
log.Debug("got an empty table (or reached next section)")
346+
// If the table had only comments, attach them to the table itself so they don't leak to the next node.
347+
if !sawKeyValue && len(dec.pendingComments) > 0 {
348+
comments := strings.Join(dec.pendingComments, "\n")
349+
if tableNodeValue.HeadComment == "" {
350+
tableNodeValue.HeadComment = comments
351+
} else {
352+
tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments
353+
}
354+
dec.pendingComments = make([]string, 0)
355+
}
339356
runAgainstCurrentExp = true
357+
break
358+
}
359+
360+
sawKeyValue = true
361+
runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, tableValue)
362+
if err != nil && !errors.Is(err, io.EOF) {
363+
return false, err
364+
}
365+
break
366+
}
367+
// If we hit EOF after only seeing comments inside this table, attach them to the table itself
368+
// so they don't leak to whatever comes next.
369+
if !sawKeyValue && len(dec.pendingComments) > 0 {
370+
comments := strings.Join(dec.pendingComments, "\n")
371+
if tableNodeValue.HeadComment == "" {
372+
tableNodeValue.HeadComment = comments
340373
} else {
341-
runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, tableValue)
342-
if err != nil && !errors.Is(err, io.EOF) {
343-
return false, err
344-
}
374+
tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments
345375
}
376+
dec.pendingComments = make([]string, 0)
346377
}
347378

348379
err = dec.d.DeeplyAssign(c, fullPath, tableNodeValue)
@@ -405,19 +436,58 @@ func (dec *tomlDecoder) processArrayTable(currentNode *toml.Node) (bool, error)
405436
}
406437

407438
runAgainstCurrentExp := false
408-
// if the next value is a ArrayTable or Table, then its not part of this declaration (not a key value pair)
409-
// so lets leave that expression for the next round of parsing
410-
if hasValue && (dec.parser.Expression().Kind == toml.ArrayTable || dec.parser.Expression().Kind == toml.Table) {
411-
runAgainstCurrentExp = true
412-
} else if hasValue {
413-
// otherwise, if there is a value, it must be some key value pairs of the
414-
// first object in the array!
415-
tableValue := dec.parser.Expression()
416-
runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, tableValue)
417-
if err != nil && !errors.Is(err, io.EOF) {
418-
return false, err
439+
sawKeyValue := false
440+
if hasValue {
441+
for {
442+
exp := dec.parser.Expression()
443+
// Allow standalone comments inside array tables before the first key-value.
444+
if exp.Kind == toml.Comment {
445+
dec.pendingComments = append(dec.pendingComments, string(exp.Data))
446+
hasValue = dec.parser.NextExpression()
447+
if !hasValue {
448+
break
449+
}
450+
continue
451+
}
452+
453+
// if the next value is a ArrayTable or Table, then its not part of this declaration (not a key value pair)
454+
// so lets leave that expression for the next round of parsing
455+
if exp.Kind == toml.ArrayTable || exp.Kind == toml.Table {
456+
// If this array-table entry had only comments, attach them to the entry so they don't leak.
457+
if !sawKeyValue && len(dec.pendingComments) > 0 {
458+
comments := strings.Join(dec.pendingComments, "\n")
459+
if tableNodeValue.HeadComment == "" {
460+
tableNodeValue.HeadComment = comments
461+
} else {
462+
tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments
463+
}
464+
dec.pendingComments = make([]string, 0)
465+
}
466+
runAgainstCurrentExp = true
467+
break
468+
}
469+
470+
sawKeyValue = true
471+
// otherwise, if there is a value, it must be some key value pairs of the
472+
// first object in the array!
473+
runAgainstCurrentExp, err = dec.decodeKeyValuesIntoMap(tableNodeValue, exp)
474+
if err != nil && !errors.Is(err, io.EOF) {
475+
return false, err
476+
}
477+
break
419478
}
420479
}
480+
// If we hit EOF after only seeing comments inside this array-table entry, attach them to the entry
481+
// so they don't leak to whatever comes next.
482+
if !sawKeyValue && len(dec.pendingComments) > 0 {
483+
comments := strings.Join(dec.pendingComments, "\n")
484+
if tableNodeValue.HeadComment == "" {
485+
tableNodeValue.HeadComment = comments
486+
} else {
487+
tableNodeValue.HeadComment = tableNodeValue.HeadComment + "\n" + comments
488+
}
489+
dec.pendingComments = make([]string, 0)
490+
}
421491

422492
// += function
423493
err = dec.arrayAppend(c, fullPath, tableNodeValue)

pkg/yqlib/toml_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,14 @@ B = 12
228228
name = "Tom" # name comment
229229
`
230230

231+
// Repro for https://github.com/mikefarah/yq/issues/2588
232+
// Bug: standalone comments inside a table cause subsequent key-values to be assigned at root.
233+
var issue2588RustToolchainWithComments = `
234+
[owner]
235+
# comment
236+
name = "Tomer"
237+
`
238+
231239
var sampleFromWeb = `# This is a TOML document
232240
title = "TOML Example"
233241
@@ -550,6 +558,22 @@ var tomlScenarios = []formatScenario{
550558
expected: rtComments,
551559
scenarioType: "roundtrip",
552560
},
561+
{
562+
skipDoc: true,
563+
description: "Issue #2588: comments inside table must not flatten (.owner.name)",
564+
input: issue2588RustToolchainWithComments,
565+
expression: ".owner.name",
566+
expected: "Tomer\n",
567+
scenarioType: "decode",
568+
},
569+
{
570+
skipDoc: true,
571+
description: "Issue #2588: comments inside table must not flatten (.name)",
572+
input: issue2588RustToolchainWithComments,
573+
expression: ".name",
574+
expected: "null\n",
575+
scenarioType: "decode",
576+
},
553577
{
554578
description: "Roundtrip: sample from web",
555579
input: sampleFromWeb,

0 commit comments

Comments
 (0)