Skip to content
This repository was archived by the owner on Apr 1, 2025. It is now read-only.

Commit f221b84

Browse files
liggittniemeyer
authored andcommitted
Improve heuristics preventing CPU/memory abuse (#515)
This addresses the following items: ==== Parse time of excessively deep nested or indented documents Parsing these documents is non-linear; limiting stack depth to 10,000 keeps parse times of pathological documents sub-second (~.25 seconds in benchmarks) ==== Alias node expansion limits The current limit allows 10,000% expansion, which is too permissive for large documents. Limiting to 10% expansion for larger documents allows callers to use input size as an effective way to limit resource usage. Continuing to allow larger expansion rates (up to the current 10,000% limit) for smaller documents does not unduly affect memory use. This change bounds decode operations from alias expansion to ~400,000 operations for small documents (worst-case ~100-150MB) or 10% of the input document for large documents, whichever is greater.
1 parent bb4e33b commit f221b84

3 files changed

Lines changed: 165 additions & 1 deletion

File tree

benchmark_test.go

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package yaml_test
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
. "gopkg.in/check.v1"
8+
"gopkg.in/yaml.v2"
9+
)
10+
11+
type testcase struct {
12+
name string
13+
data []byte
14+
error string
15+
}
16+
17+
func testcases() []testcase {
18+
return []testcase{
19+
{
20+
name: "1000kb of maps with 100 aliases",
21+
data: []byte(`{a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-100) + `], b: &b [*a` + strings.Repeat(`,*a`, 99) + `]}`),
22+
error: "yaml: document contains excessive aliasing",
23+
},
24+
{
25+
name: "1000kb of deeply nested slices",
26+
data: []byte(strings.Repeat(`[`, 1000*1024)),
27+
error: "yaml: exceeded max depth of 10000",
28+
},
29+
{
30+
name: "1000kb of deeply nested maps",
31+
data: []byte("x: " + strings.Repeat(`{`, 1000*1024)),
32+
error: "yaml: exceeded max depth of 10000",
33+
},
34+
{
35+
name: "1000kb of deeply nested indents",
36+
data: []byte(strings.Repeat(`- `, 1000*1024)),
37+
error: "yaml: exceeded max depth of 10000",
38+
},
39+
{
40+
name: "1000kb of 1000-indent lines",
41+
data: []byte(strings.Repeat(strings.Repeat(`- `, 1000)+"\n", 1024/2)),
42+
},
43+
{name: "1kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1*1024/4-1) + `]`)},
44+
{name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)},
45+
{name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)},
46+
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
47+
}
48+
}
49+
50+
func (s *S) TestLimits(c *C) {
51+
if testing.Short() {
52+
return
53+
}
54+
for _, tc := range testcases() {
55+
var v interface{}
56+
err := yaml.Unmarshal(tc.data, &v)
57+
if len(tc.error) > 0 {
58+
c.Assert(err, ErrorMatches, tc.error, Commentf("testcase: %s", tc.name))
59+
} else {
60+
c.Assert(err, IsNil, Commentf("testcase: %s", tc.name))
61+
}
62+
}
63+
}
64+
65+
func Benchmark1000KB100Aliases(b *testing.B) {
66+
benchmark(b, "1000kb of maps with 100 aliases")
67+
}
68+
func Benchmark1000KBDeeplyNestedSlices(b *testing.B) {
69+
benchmark(b, "1000kb of deeply nested slices")
70+
}
71+
func Benchmark1000KBDeeplyNestedMaps(b *testing.B) {
72+
benchmark(b, "1000kb of deeply nested maps")
73+
}
74+
func Benchmark1000KBDeeplyNestedIndents(b *testing.B) {
75+
benchmark(b, "1000kb of deeply nested indents")
76+
}
77+
func Benchmark1000KB1000IndentLines(b *testing.B) {
78+
benchmark(b, "1000kb of 1000-indent lines")
79+
}
80+
func Benchmark1KBMaps(b *testing.B) {
81+
benchmark(b, "1kb of maps")
82+
}
83+
func Benchmark10KBMaps(b *testing.B) {
84+
benchmark(b, "10kb of maps")
85+
}
86+
func Benchmark100KBMaps(b *testing.B) {
87+
benchmark(b, "100kb of maps")
88+
}
89+
func Benchmark1000KBMaps(b *testing.B) {
90+
benchmark(b, "1000kb of maps")
91+
}
92+
93+
func benchmark(b *testing.B, name string) {
94+
var tc testcase
95+
for _, t := range testcases() {
96+
if t.name == name {
97+
tc = t
98+
break
99+
}
100+
}
101+
if tc.name != name {
102+
b.Errorf("testcase %q not found", name)
103+
return
104+
}
105+
106+
b.ResetTimer()
107+
108+
for i := 0; i < b.N; i++ {
109+
var v interface{}
110+
err := yaml.Unmarshal(tc.data, &v)
111+
if len(tc.error) > 0 {
112+
if err == nil {
113+
b.Errorf("expected error, got none")
114+
} else if err.Error() != tc.error {
115+
b.Errorf("expected error '%s', got '%s'", tc.error, err.Error())
116+
}
117+
} else {
118+
if err != nil {
119+
b.Errorf("unexpected error: %v", err)
120+
}
121+
}
122+
}
123+
}

decode.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,37 @@ func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unm
318318
return out, false, false
319319
}
320320

321+
const (
322+
// 400,000 decode operations is ~500kb of dense object declarations, or ~5kb of dense object declarations with 10000% alias expansion
323+
alias_ratio_range_low = 400000
324+
// 4,000,000 decode operations is ~5MB of dense object declarations, or ~4.5MB of dense object declarations with 10% alias expansion
325+
alias_ratio_range_high = 4000000
326+
// alias_ratio_range is the range over which we scale allowed alias ratios
327+
alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low)
328+
)
329+
330+
func allowedAliasRatio(decodeCount int) float64 {
331+
switch {
332+
case decodeCount <= alias_ratio_range_low:
333+
// allow 99% to come from alias expansion for small-to-medium documents
334+
return 0.99
335+
case decodeCount >= alias_ratio_range_high:
336+
// allow 10% to come from alias expansion for very large documents
337+
return 0.10
338+
default:
339+
// scale smoothly from 99% down to 10% over the range.
340+
// this maps to 396,000 - 400,000 allowed alias-driven decodes over the range.
341+
// 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps).
342+
return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range)
343+
}
344+
}
345+
321346
func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
322347
d.decodeCount++
323348
if d.aliasDepth > 0 {
324349
d.aliasCount++
325350
}
326-
if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > 0.99 {
351+
if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) {
327352
failf("document contains excessive aliasing")
328353
}
329354
switch n.kind {

scannerc.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -906,13 +906,21 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
906906
return true
907907
}
908908

909+
// max_flow_level limits the flow_level
910+
const max_flow_level = 10000
911+
909912
// Increase the flow level and resize the simple key list if needed.
910913
func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
911914
// Reset the simple key on the next level.
912915
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
913916

914917
// Increase the flow level.
915918
parser.flow_level++
919+
if parser.flow_level > max_flow_level {
920+
return yaml_parser_set_scanner_error(parser,
921+
"while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
922+
fmt.Sprintf("exceeded max depth of %d", max_flow_level))
923+
}
916924
return true
917925
}
918926

@@ -925,6 +933,9 @@ func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
925933
return true
926934
}
927935

936+
// max_indents limits the indents stack size
937+
const max_indents = 10000
938+
928939
// Push the current indentation level to the stack and set the new level
929940
// the current column is greater than the indentation level. In this case,
930941
// append or insert the specified token into the token queue.
@@ -939,6 +950,11 @@ func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml
939950
// indentation level.
940951
parser.indents = append(parser.indents, parser.indent)
941952
parser.indent = column
953+
if len(parser.indents) > max_indents {
954+
return yaml_parser_set_scanner_error(parser,
955+
"while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
956+
fmt.Sprintf("exceeded max depth of %d", max_indents))
957+
}
942958

943959
// Create a token and insert it into the queue.
944960
token := yaml_token_t{

0 commit comments

Comments
 (0)