Skip to content

Commit f36a3ec

Browse files
pelletierclaude
andauthored
Reduce marshal and unmarshal overhead (#1044)
* Reduce marshal and unmarshal overhead Targeted optimizations to reduce performance overhead introduced by recent feature additions and the unsafe removal. Unmarshal: - parseKeyval: access the node directly in the builder's slice to set Raw, bypassing NodeAt which triggers a GC write barrier for the nodes-pointer on every key-value expression. - Iterator.Next: cache the *nodes slice dereference in a local variable to avoid repeated pointer-to-slice indirection in the hot loop. Marshal: - Guard shouldOmitZero calls with an inlineable options.omitzero check. shouldOmitZero has inlining cost 1145 (budget 80), so avoiding the function call when omitzero is not set removes per-field overhead. - Inline the isNil check in encodeMap. isNil has inlining cost 93 (budget 80), so expanding it at the single hot call site avoids per-map-entry function call overhead. Update README benchmarks. Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
1 parent 77f3862 commit f36a3ec

4 files changed

Lines changed: 50 additions & 42 deletions

File tree

README.md

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -235,40 +235,40 @@ the AST level. See https://pkg.go.dev/github.com/pelletier/go-toml/v2/unstable.
235235
Execution time speedup compared to other Go TOML libraries:
236236

237237
<table>
238-
<thead>
239-
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
240-
</thead>
241-
<tbody>
242-
<tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
243-
<tr><td>Marshal/ReferenceFile/map-2</td><td>1.9x</td><td>2.0x</td></tr>
244-
<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
245-
<tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.4x</td><td>2.8x</td></tr>
246-
<tr><td>Unmarshal/ReferenceFile/map-2</td><td>3.0x</td><td>3.0x</td></tr>
247-
<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.9x</td><td>5.1x</td></tr>
248-
</tbody>
238+
<thead>
239+
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
240+
</thead>
241+
<tbody>
242+
<tr><td>Marshal/HugoFrontMatter-2</td><td>2.1x</td><td>2.0x</td></tr>
243+
<tr><td>Marshal/ReferenceFile/map-2</td><td>2.0x</td><td>2.0x</td></tr>
244+
<tr><td>Marshal/ReferenceFile/struct-2</td><td>2.3x</td><td>2.5x</td></tr>
245+
<tr><td>Unmarshal/HugoFrontMatter-2</td><td>3.3x</td><td>2.8x</td></tr>
246+
<tr><td>Unmarshal/ReferenceFile/map-2</td><td>2.9x</td><td>3.0x</td></tr>
247+
<tr><td>Unmarshal/ReferenceFile/struct-2</td><td>4.8x</td><td>5.0x</td></tr>
248+
</tbody>
249249
</table>
250250
<details><summary>See more</summary>
251251
<p>The table above has the results of the most common use-cases. The table below
252252
contains the results of all benchmarks, including unrealistic ones. It is
253253
provided for completeness.</p>
254254

255255
<table>
256-
<thead>
257-
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
258-
</thead>
259-
<tbody>
260-
<tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
261-
<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.5x</td></tr>
262-
<tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.3x</td><td>3.5x</td></tr>
263-
<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.5x</td></tr>
264-
<tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
265-
<tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.9x</td></tr>
266-
<tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
267-
<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.1x</td><td>2.1x</td></tr>
268-
<tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
269-
<tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.1x</td></tr>
270-
<tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
271-
</tbody>
256+
<thead>
257+
<tr><th>Benchmark</th><th>go-toml v1</th><th>BurntSushi/toml</th></tr>
258+
</thead>
259+
<tbody>
260+
<tr><td>Marshal/SimpleDocument/map-2</td><td>2.0x</td><td>2.9x</td></tr>
261+
<tr><td>Marshal/SimpleDocument/struct-2</td><td>2.5x</td><td>3.6x</td></tr>
262+
<tr><td>Unmarshal/SimpleDocument/map-2</td><td>4.2x</td><td>3.4x</td></tr>
263+
<tr><td>Unmarshal/SimpleDocument/struct-2</td><td>5.9x</td><td>4.4x</td></tr>
264+
<tr><td>UnmarshalDataset/example-2</td><td>3.2x</td><td>2.9x</td></tr>
265+
<tr><td>UnmarshalDataset/code-2</td><td>2.4x</td><td>2.8x</td></tr>
266+
<tr><td>UnmarshalDataset/twitter-2</td><td>2.7x</td><td>2.5x</td></tr>
267+
<tr><td>UnmarshalDataset/citm_catalog-2</td><td>2.3x</td><td>2.3x</td></tr>
268+
<tr><td>UnmarshalDataset/canada-2</td><td>1.9x</td><td>1.5x</td></tr>
269+
<tr><td>UnmarshalDataset/config-2</td><td>5.4x</td><td>3.0x</td></tr>
270+
<tr><td>geomean</td><td>2.9x</td><td>2.8x</td></tr>
271+
</tbody>
272272
</table>
273273
<p>This table can be generated with <code>./ci.sh benchmark -a -html</code>.</p>
274274
</details>

marshaler.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -704,15 +704,18 @@ func (enc *Encoder) encodeMap(b []byte, ctx encoderCtx, v reflect.Value) ([]byte
704704
for iter.Next() {
705705
v := iter.Value()
706706

707-
if isNil(v) {
708-
// For nil pointers, convert to zero value of the element type.
709-
// This allows round-trip marshaling of maps with nil pointer values.
710-
// For nil interfaces and nil maps, skip since we can't derive a type.
711-
if v.Kind() == reflect.Ptr {
707+
// Handle nil values: convert nil pointers to zero value,
708+
// skip nil interfaces and nil maps.
709+
switch v.Kind() {
710+
case reflect.Ptr:
711+
if v.IsNil() {
712712
v = reflect.Zero(v.Type().Elem())
713-
} else {
713+
}
714+
case reflect.Interface, reflect.Map:
715+
if v.IsNil() {
714716
continue
715717
}
718+
default:
716719
}
717720

718721
k, err := enc.keyToString(iter.Key())
@@ -936,7 +939,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
936939
if shouldOmitEmpty(kv.Options, kv.Value) {
937940
continue
938941
}
939-
if shouldOmitZero(kv.Options, kv.Value) {
942+
if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
940943
continue
941944
}
942945
hasNonEmptyKV = true
@@ -958,7 +961,7 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro
958961
if shouldOmitEmpty(table.Options, table.Value) {
959962
continue
960963
}
961-
if shouldOmitZero(table.Options, table.Value) {
964+
if table.Options.omitzero && shouldOmitZero(table.Options, table.Value) {
962965
continue
963966
}
964967
if first {
@@ -995,7 +998,7 @@ func (enc *Encoder) encodeTableInline(b []byte, ctx encoderCtx, t table) ([]byte
995998
if shouldOmitEmpty(kv.Options, kv.Value) {
996999
continue
9971000
}
998-
if shouldOmitZero(kv.Options, kv.Value) {
1001+
if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) {
9991002
continue
10001003
}
10011004

unstable/ast.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,16 @@ func (c *Iterator) Next() bool {
2828
if c.nodes == nil {
2929
return false
3030
}
31+
nodes := *c.nodes
3132
if !c.started {
3233
c.started = true
33-
} else if c.idx >= 0 {
34-
c.idx = (*c.nodes)[c.idx].next
34+
} else {
35+
idx := c.idx
36+
if idx >= 0 && int(idx) < len(nodes) {
37+
c.idx = nodes[idx].next
38+
}
3539
}
36-
return c.idx >= 0 && int(c.idx) < len(*c.nodes)
40+
return c.idx >= 0 && int(c.idx) < len(nodes)
3741
}
3842

3943
// IsLast returns true if the current node of the iterator is the last

unstable/parser.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,9 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
363363
p.builder.Chain(valRef, key)
364364
p.builder.AttachChild(ref, valRef)
365365

366-
// Set Raw to span the entire key-value expression
367-
node := p.builder.NodeAt(ref)
368-
node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
366+
// Set Raw to span the entire key-value expression.
367+
// Access the node directly in the slice to avoid the write barrier
368+
// that NodeAt's nodes-pointer setup would trigger.
369+
p.builder.tree.nodes[ref].Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b)
369370

370371
return ref, b, err
371372
}

0 commit comments

Comments
 (0)