Skip to content

Commit bd0376d

Browse files
authored
UTF-8 support in validation, and some parsers and formatters (#537)
UTF-8 support in validation, and some parsers and formatters --------- Signed-off-by: Owen Williams <[email protected]>
1 parent 7e44242 commit bd0376d

13 files changed

+574
-150
lines changed

config/http_config.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import (
3030
"sync"
3131
"time"
3232

33-
"github.com/mwitkow/go-conntrack"
33+
conntrack "github.com/mwitkow/go-conntrack"
3434
"golang.org/x/net/http/httpproxy"
3535
"golang.org/x/net/http2"
3636
"golang.org/x/oauth2"

expfmt/decode_test.go

+49-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"bufio"
1818
"errors"
1919
"io"
20+
"math"
2021
"net/http"
2122
"reflect"
2223
"sort"
@@ -104,9 +105,10 @@ func TestProtoDecoder(t *testing.T) {
104105
testTime := model.Now()
105106

106107
scenarios := []struct {
107-
in string
108-
expected model.Vector
109-
fail bool
108+
in string
109+
expected model.Vector
110+
legacyNameFail bool
111+
fail bool
110112
}{
111113
{
112114
in: "",
@@ -332,6 +334,30 @@ func TestProtoDecoder(t *testing.T) {
332334
},
333335
},
334336
},
337+
{
338+
in: "\xa8\x01\n\ngauge.name\x12\x11gauge\ndoc\nstr\"ing\x18\x01\"T\n\x1b\n\x06name.1\x12\x11val with\nnew line\n*\n\x06name*2\x12 val with \\backslash and \"quotes\"\x12\t\t\x00\x00\x00\x00\x00\x00\xf0\x7f\"/\n\x10\n\x06name.1\x12\x06Björn\n\x10\n\x06name*2\x12\x06佖佥\x12\t\t\xd1\xcfD\xb9\xd0\x05\xc2H",
339+
legacyNameFail: true,
340+
expected: model.Vector{
341+
&model.Sample{
342+
Metric: model.Metric{
343+
model.MetricNameLabel: "gauge.name",
344+
"name.1": "val with\nnew line",
345+
"name*2": "val with \\backslash and \"quotes\"",
346+
},
347+
Value: model.SampleValue(math.Inf(+1)),
348+
Timestamp: testTime,
349+
},
350+
&model.Sample{
351+
Metric: model.Metric{
352+
model.MetricNameLabel: "gauge.name",
353+
"name.1": "Björn",
354+
"name*2": "佖佥",
355+
},
356+
Value: 3.14e42,
357+
Timestamp: testTime,
358+
},
359+
},
360+
},
335361
}
336362

337363
for i, scenario := range scenarios {
@@ -344,11 +370,31 @@ func TestProtoDecoder(t *testing.T) {
344370

345371
var all model.Vector
346372
for {
373+
model.NameValidationScheme = model.LegacyValidation
347374
var smpls model.Vector
348375
err := dec.Decode(&smpls)
349376
if err != nil && errors.Is(err, io.EOF) {
350377
break
351378
}
379+
if scenario.legacyNameFail {
380+
if err == nil {
381+
t.Fatal("Expected error when decoding without UTF-8 support enabled but got none")
382+
}
383+
model.NameValidationScheme = model.UTF8Validation
384+
dec = &SampleDecoder{
385+
Dec: &protoDecoder{r: strings.NewReader(scenario.in)},
386+
Opts: &DecodeOptions{
387+
Timestamp: testTime,
388+
},
389+
}
390+
err = dec.Decode(&smpls)
391+
if errors.Is(err, io.EOF) {
392+
break
393+
}
394+
if err != nil {
395+
t.Fatalf("Unexpected error when decoding with UTF-8 support: %v", err)
396+
}
397+
}
352398
if scenario.fail {
353399
if err == nil {
354400
t.Fatal("Expected error but got none")

expfmt/expfmt.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@ package expfmt
1717
// Format specifies the HTTP content type of the different wire protocols.
1818
type Format string
1919

20-
// Constants to assemble the Content-Type values for the different wire protocols.
20+
// Constants to assemble the Content-Type values for the different wire
21+
// protocols. The Content-Type strings here are all for the legacy exposition
22+
// formats, where valid characters for metric names and label names are limited.
23+
// Support for arbitrary UTF-8 characters in those names is already partially
24+
// implemented in this module (see model.ValidationScheme), but to actually use
25+
// it on the wire, new content-type strings will have to be agreed upon and
26+
// added here.
2127
const (
2228
TextVersion = "0.0.4"
2329
ProtoType = `application/vnd.google.protobuf`

expfmt/openmetrics_create.go

+58-27
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,18 @@ import (
3535
// sanity checks. If the input contains duplicate metrics or invalid metric or
3636
// label names, the conversion will result in invalid text format output.
3737
//
38+
// If metric names conform to the legacy validation pattern, they will be placed
39+
// outside the brackets in the traditional way, like `foo{}`. If the metric name
40+
// fails the legacy validation check, it will be placed quoted inside the
41+
// brackets: `{"foo"}`. As stated above, the input is assumed to be santized and
42+
// no error will be thrown in this case.
43+
//
44+
// Similar to metric names, if label names conform to the legacy validation
45+
// pattern, they will be unquoted as normal, like `foo{bar="baz"}`. If the label
46+
// name fails the legacy validation check, it will be quoted:
47+
// `foo{"bar"="baz"}`. As stated above, the input is assumed to be santized and
48+
// no error will be thrown in this case.
49+
//
3850
// This function fulfills the type 'expfmt.encoder'.
3951
//
4052
// Note that OpenMetrics requires a final `# EOF` line. Since this function acts
@@ -98,7 +110,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int
98110
if err != nil {
99111
return
100112
}
101-
n, err = w.WriteString(shortName)
113+
n, err = writeName(w, shortName)
102114
written += n
103115
if err != nil {
104116
return
@@ -124,7 +136,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int
124136
if err != nil {
125137
return
126138
}
127-
n, err = w.WriteString(shortName)
139+
n, err = writeName(w, shortName)
128140
written += n
129141
if err != nil {
130142
return
@@ -303,21 +315,9 @@ func writeOpenMetricsSample(
303315
floatValue float64, intValue uint64, useIntValue bool,
304316
exemplar *dto.Exemplar,
305317
) (int, error) {
306-
var written int
307-
n, err := w.WriteString(name)
308-
written += n
309-
if err != nil {
310-
return written, err
311-
}
312-
if suffix != "" {
313-
n, err = w.WriteString(suffix)
314-
written += n
315-
if err != nil {
316-
return written, err
317-
}
318-
}
319-
n, err = writeOpenMetricsLabelPairs(
320-
w, metric.Label, additionalLabelName, additionalLabelValue,
318+
written := 0
319+
n, err := writeOpenMetricsNameAndLabelPairs(
320+
w, name+suffix, metric.Label, additionalLabelName, additionalLabelValue,
321321
)
322322
written += n
323323
if err != nil {
@@ -365,27 +365,58 @@ func writeOpenMetricsSample(
365365
return written, nil
366366
}
367367

368-
// writeOpenMetricsLabelPairs works like writeOpenMetrics but formats the float
369-
// in OpenMetrics style.
370-
func writeOpenMetricsLabelPairs(
368+
// writeOpenMetricsNameAndLabelPairs works like writeOpenMetricsSample but
369+
// formats the float in OpenMetrics style.
370+
func writeOpenMetricsNameAndLabelPairs(
371371
w enhancedWriter,
372+
name string,
372373
in []*dto.LabelPair,
373374
additionalLabelName string, additionalLabelValue float64,
374375
) (int, error) {
375-
if len(in) == 0 && additionalLabelName == "" {
376-
return 0, nil
377-
}
378376
var (
379-
written int
380-
separator byte = '{'
377+
written int
378+
separator byte = '{'
379+
metricInsideBraces = false
381380
)
381+
382+
if name != "" {
383+
// If the name does not pass the legacy validity check, we must put the
384+
// metric name inside the braces, quoted.
385+
if !model.IsValidLegacyMetricName(model.LabelValue(name)) {
386+
metricInsideBraces = true
387+
err := w.WriteByte(separator)
388+
written++
389+
if err != nil {
390+
return written, err
391+
}
392+
separator = ','
393+
}
394+
395+
n, err := writeName(w, name)
396+
written += n
397+
if err != nil {
398+
return written, err
399+
}
400+
}
401+
402+
if len(in) == 0 && additionalLabelName == "" {
403+
if metricInsideBraces {
404+
err := w.WriteByte('}')
405+
written++
406+
if err != nil {
407+
return written, err
408+
}
409+
}
410+
return written, nil
411+
}
412+
382413
for _, lp := range in {
383414
err := w.WriteByte(separator)
384415
written++
385416
if err != nil {
386417
return written, err
387418
}
388-
n, err := w.WriteString(lp.GetName())
419+
n, err := writeName(w, lp.GetName())
389420
written += n
390421
if err != nil {
391422
return written, err
@@ -451,7 +482,7 @@ func writeExemplar(w enhancedWriter, e *dto.Exemplar) (int, error) {
451482
if err != nil {
452483
return written, err
453484
}
454-
n, err = writeOpenMetricsLabelPairs(w, e.Label, "", 0)
485+
n, err = writeOpenMetricsNameAndLabelPairs(w, "", e.Label, "", 0)
455486
written += n
456487
if err != nil {
457488
return written, err

0 commit comments

Comments
 (0)