Skip to content

Commit 8968b6c

Browse files
authored
expfmt: Add UTF-8 syntax support in text_parse.go (#670)
Update expfmt/text_parse.go to support the new UTF-8 syntax --------- Signed-off-by: Federico Torres <[email protected]>
1 parent cd4bcc0 commit 8968b6c

File tree

2 files changed

+381
-22
lines changed

2 files changed

+381
-22
lines changed

expfmt/text_parse.go

+131-21
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ import (
2222
"math"
2323
"strconv"
2424
"strings"
25+
"unicode/utf8"
2526

2627
dto "github.com/prometheus/client_model/go"
27-
2828
"google.golang.org/protobuf/proto"
2929

3030
"github.com/prometheus/common/model"
@@ -60,6 +60,7 @@ type TextParser struct {
6060
currentMF *dto.MetricFamily
6161
currentMetric *dto.Metric
6262
currentLabelPair *dto.LabelPair
63+
currentLabelPairs []*dto.LabelPair // Temporarily stores label pairs while parsing a metric line.
6364

6465
// The remaining member variables are only used for summaries/histograms.
6566
currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le'
@@ -74,6 +75,7 @@ type TextParser struct {
7475
// count and sum of that summary/histogram.
7576
currentIsSummaryCount, currentIsSummarySum bool
7677
currentIsHistogramCount, currentIsHistogramSum bool
78+
currentMetricIsInsideBraces bool
7779
}
7880

7981
// TextToMetricFamilies reads 'in' as the simple and flat text-based exchange
@@ -137,12 +139,14 @@ func (p *TextParser) reset(in io.Reader) {
137139
}
138140
p.currentQuantile = math.NaN()
139141
p.currentBucket = math.NaN()
142+
p.currentMF = nil
140143
}
141144

142145
// startOfLine represents the state where the next byte read from p.buf is the
143146
// start of a line (or whitespace leading up to it).
144147
func (p *TextParser) startOfLine() stateFn {
145148
p.lineCount++
149+
p.currentMetricIsInsideBraces = false
146150
if p.skipBlankTab(); p.err != nil {
147151
// This is the only place that we expect to see io.EOF,
148152
// which is not an error but the signal that we are done.
@@ -158,6 +162,9 @@ func (p *TextParser) startOfLine() stateFn {
158162
return p.startComment
159163
case '\n':
160164
return p.startOfLine // Empty line, start the next one.
165+
case '{':
166+
p.currentMetricIsInsideBraces = true
167+
return p.readingLabels
161168
}
162169
return p.readingMetricName
163170
}
@@ -275,6 +282,8 @@ func (p *TextParser) startLabelName() stateFn {
275282
return nil // Unexpected end of input.
276283
}
277284
if p.currentByte == '}' {
285+
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
286+
p.currentLabelPairs = nil
278287
if p.skipBlankTab(); p.err != nil {
279288
return nil // Unexpected end of input.
280289
}
@@ -287,6 +296,38 @@ func (p *TextParser) startLabelName() stateFn {
287296
p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName()))
288297
return nil
289298
}
299+
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
300+
return nil // Unexpected end of input.
301+
}
302+
if p.currentByte != '=' {
303+
if p.currentMetricIsInsideBraces {
304+
if p.currentMF != nil && p.currentMF.GetName() != p.currentToken.String() {
305+
p.parseError(fmt.Sprintf("multiple metric names %s %s", p.currentMF.GetName(), p.currentToken.String()))
306+
return nil
307+
}
308+
switch p.currentByte {
309+
case ',':
310+
p.setOrCreateCurrentMF()
311+
p.currentMetric = &dto.Metric{}
312+
return p.startLabelName
313+
case '}':
314+
p.setOrCreateCurrentMF()
315+
p.currentMetric = &dto.Metric{}
316+
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
317+
p.currentLabelPairs = nil
318+
if p.skipBlankTab(); p.err != nil {
319+
return nil // Unexpected end of input.
320+
}
321+
return p.readingValue
322+
default:
323+
p.parseError(fmt.Sprintf("unexpected end of metric name %q", p.currentByte))
324+
return nil
325+
}
326+
}
327+
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
328+
p.currentLabelPairs = nil
329+
return nil
330+
}
290331
p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())}
291332
if p.currentLabelPair.GetName() == string(model.MetricNameLabel) {
292333
p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel))
@@ -296,23 +337,17 @@ func (p *TextParser) startLabelName() stateFn {
296337
// labels to 'real' labels.
297338
if !(p.currentMF.GetType() == dto.MetricType_SUMMARY && p.currentLabelPair.GetName() == model.QuantileLabel) &&
298339
!(p.currentMF.GetType() == dto.MetricType_HISTOGRAM && p.currentLabelPair.GetName() == model.BucketLabel) {
299-
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPair)
300-
}
301-
if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
302-
return nil // Unexpected end of input.
303-
}
304-
if p.currentByte != '=' {
305-
p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
306-
return nil
340+
p.currentLabelPairs = append(p.currentLabelPairs, p.currentLabelPair)
307341
}
308342
// Check for duplicate label names.
309343
labels := make(map[string]struct{})
310-
for _, l := range p.currentMetric.Label {
344+
for _, l := range p.currentLabelPairs {
311345
lName := l.GetName()
312346
if _, exists := labels[lName]; !exists {
313347
labels[lName] = struct{}{}
314348
} else {
315349
p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName()))
350+
p.currentLabelPairs = nil
316351
return nil
317352
}
318353
}
@@ -345,6 +380,7 @@ func (p *TextParser) startLabelValue() stateFn {
345380
if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
346381
// Create a more helpful error message.
347382
p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue()))
383+
p.currentLabelPairs = nil
348384
return nil
349385
}
350386
} else {
@@ -371,12 +407,19 @@ func (p *TextParser) startLabelValue() stateFn {
371407
return p.startLabelName
372408

373409
case '}':
410+
if p.currentMF == nil {
411+
p.parseError("invalid metric name")
412+
return nil
413+
}
414+
p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
415+
p.currentLabelPairs = nil
374416
if p.skipBlankTab(); p.err != nil {
375417
return nil // Unexpected end of input.
376418
}
377419
return p.readingValue
378420
default:
379421
p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue()))
422+
p.currentLabelPairs = nil
380423
return nil
381424
}
382425
}
@@ -585,6 +628,8 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
585628
p.currentToken.WriteByte(p.currentByte)
586629
case 'n':
587630
p.currentToken.WriteByte('\n')
631+
case '"':
632+
p.currentToken.WriteByte('"')
588633
default:
589634
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
590635
return
@@ -610,13 +655,45 @@ func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
610655
// but not into p.currentToken.
611656
func (p *TextParser) readTokenAsMetricName() {
612657
p.currentToken.Reset()
658+
// A UTF-8 metric name must be quoted and may have escaped characters.
659+
quoted := false
660+
escaped := false
613661
if !isValidMetricNameStart(p.currentByte) {
614662
return
615663
}
616-
for {
617-
p.currentToken.WriteByte(p.currentByte)
664+
for p.err == nil {
665+
if escaped {
666+
switch p.currentByte {
667+
case '\\':
668+
p.currentToken.WriteByte(p.currentByte)
669+
case 'n':
670+
p.currentToken.WriteByte('\n')
671+
case '"':
672+
p.currentToken.WriteByte('"')
673+
default:
674+
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
675+
return
676+
}
677+
escaped = false
678+
} else {
679+
switch p.currentByte {
680+
case '"':
681+
quoted = !quoted
682+
if !quoted {
683+
p.currentByte, p.err = p.buf.ReadByte()
684+
return
685+
}
686+
case '\n':
687+
p.parseError(fmt.Sprintf("metric name %q contains unescaped new-line", p.currentToken.String()))
688+
return
689+
case '\\':
690+
escaped = true
691+
default:
692+
p.currentToken.WriteByte(p.currentByte)
693+
}
694+
}
618695
p.currentByte, p.err = p.buf.ReadByte()
619-
if p.err != nil || !isValidMetricNameContinuation(p.currentByte) {
696+
if !isValidMetricNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == ' ') {
620697
return
621698
}
622699
}
@@ -628,13 +705,45 @@ func (p *TextParser) readTokenAsMetricName() {
628705
// but not into p.currentToken.
629706
func (p *TextParser) readTokenAsLabelName() {
630707
p.currentToken.Reset()
708+
// A UTF-8 label name must be quoted and may have escaped characters.
709+
quoted := false
710+
escaped := false
631711
if !isValidLabelNameStart(p.currentByte) {
632712
return
633713
}
634-
for {
635-
p.currentToken.WriteByte(p.currentByte)
714+
for p.err == nil {
715+
if escaped {
716+
switch p.currentByte {
717+
case '\\':
718+
p.currentToken.WriteByte(p.currentByte)
719+
case 'n':
720+
p.currentToken.WriteByte('\n')
721+
case '"':
722+
p.currentToken.WriteByte('"')
723+
default:
724+
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
725+
return
726+
}
727+
escaped = false
728+
} else {
729+
switch p.currentByte {
730+
case '"':
731+
quoted = !quoted
732+
if !quoted {
733+
p.currentByte, p.err = p.buf.ReadByte()
734+
return
735+
}
736+
case '\n':
737+
p.parseError(fmt.Sprintf("label name %q contains unescaped new-line", p.currentToken.String()))
738+
return
739+
case '\\':
740+
escaped = true
741+
default:
742+
p.currentToken.WriteByte(p.currentByte)
743+
}
744+
}
636745
p.currentByte, p.err = p.buf.ReadByte()
637-
if p.err != nil || !isValidLabelNameContinuation(p.currentByte) {
746+
if !isValidLabelNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == '=') {
638747
return
639748
}
640749
}
@@ -660,6 +769,7 @@ func (p *TextParser) readTokenAsLabelValue() {
660769
p.currentToken.WriteByte('\n')
661770
default:
662771
p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
772+
p.currentLabelPairs = nil
663773
return
664774
}
665775
escaped = false
@@ -718,19 +828,19 @@ func (p *TextParser) setOrCreateCurrentMF() {
718828
}
719829

720830
func isValidLabelNameStart(b byte) bool {
721-
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_'
831+
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '"'
722832
}
723833

724-
func isValidLabelNameContinuation(b byte) bool {
725-
return isValidLabelNameStart(b) || (b >= '0' && b <= '9')
834+
func isValidLabelNameContinuation(b byte, quoted bool) bool {
835+
return isValidLabelNameStart(b) || (b >= '0' && b <= '9') || (quoted && utf8.ValidString(string(b)))
726836
}
727837

728838
func isValidMetricNameStart(b byte) bool {
729839
return isValidLabelNameStart(b) || b == ':'
730840
}
731841

732-
func isValidMetricNameContinuation(b byte) bool {
733-
return isValidLabelNameContinuation(b) || b == ':'
842+
func isValidMetricNameContinuation(b byte, quoted bool) bool {
843+
return isValidLabelNameContinuation(b, quoted) || b == ':'
734844
}
735845

736846
func isBlankOrTab(b byte) bool {

0 commit comments

Comments
 (0)