Skip to content

Commit 4dfcf60

Browse files
committed
filters: supporting alternative characters for quote
The regexp syntax can have some nasty characters to handle quoting correctly, in practice. In certain scenarios a double quote works well, but it can affect readability for certain regexps. This introduces a quoting mode that treats `/` and `|` as a double quote to make the quoting in regular expressions more familiar and readable. This change is introduced in a backwards compatible manner, so existing regexp quoting is not affected. Signed-off-by: Stephen J Day <[email protected]>
1 parent 617c63d commit 4dfcf60

5 files changed

Lines changed: 343 additions & 11 deletions

File tree

filters/filter_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ func TestFilters(t *testing.T) {
135135
corpus[8],
136136
},
137137
},
138+
{
139+
name: "LabelValueNoAltQuoting",
140+
input: "labels.|foo|==omg_asdf.asdf-qwer",
141+
errString: "filters: parse error: [labels. >|||< foo|==omg_asdf.asdf-qwer]: invalid quote encountered",
142+
},
138143
{
139144
name: "Name",
140145
input: "name==bar",
@@ -178,6 +183,27 @@ func TestFilters(t *testing.T) {
178183
corpus[7],
179184
},
180185
},
186+
{
187+
name: "RegexpQuotedValue",
188+
input: "other~=/[abc]+/,name!=foo",
189+
expected: []interface{}{
190+
corpus[6],
191+
corpus[7],
192+
},
193+
},
194+
{
195+
name: "RegexpQuotedValue",
196+
input: "other~=/[abc]{1,2}/,name!=foo",
197+
expected: []interface{}{
198+
corpus[6],
199+
corpus[7],
200+
},
201+
},
202+
{
203+
name: "RegexpQuotedValueGarbage",
204+
input: "other~=/[abc]{0,1}\"\\//,name!=foo",
205+
// valid syntax, but doesn't match anything
206+
},
181207
{
182208
name: "NameAndLabelValue",
183209
input: "name==bar,labels.bar==true",

filters/parser.go

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package filters
33
import (
44
"fmt"
55
"io"
6-
"strconv"
76

87
"github.com/containerd/containerd/errdefs"
98
"github.com/pkg/errors"
@@ -134,7 +133,12 @@ func (p *parser) selector() (selector, error) {
134133
return selector{}, err
135134
}
136135

137-
value, err := p.value()
136+
var allowAltQuotes bool
137+
if op == operatorMatches {
138+
allowAltQuotes = true
139+
}
140+
141+
value, err := p.value(allowAltQuotes)
138142
if err != nil {
139143
if err == io.EOF {
140144
return selector{}, io.ErrUnexpectedEOF
@@ -188,7 +192,7 @@ func (p *parser) field() (string, error) {
188192
case tokenField:
189193
return s, nil
190194
case tokenQuoted:
191-
return p.unquote(pos, s)
195+
return p.unquote(pos, s, false)
192196
}
193197

194198
return "", p.mkerr(pos, "expected field or quoted")
@@ -213,21 +217,25 @@ func (p *parser) operator() (operator, error) {
213217
return 0, p.mkerr(pos, `expected an operator ("=="|"!="|"~=")`)
214218
}
215219

216-
func (p *parser) value() (string, error) {
220+
func (p *parser) value(allowAltQuotes bool) (string, error) {
217221
pos, tok, s := p.scanner.scan()
218222

219223
switch tok {
220224
case tokenValue, tokenField:
221225
return s, nil
222226
case tokenQuoted:
223-
return p.unquote(pos, s)
227+
return p.unquote(pos, s, allowAltQuotes)
224228
}
225229

226230
return "", p.mkerr(pos, "expected value or quoted")
227231
}
228232

229-
func (p *parser) unquote(pos int, s string) (string, error) {
230-
uq, err := strconv.Unquote(s)
233+
func (p *parser) unquote(pos int, s string, allowAlts bool) (string, error) {
234+
if !allowAlts && s[0] != '\'' && s[0] != '"' {
235+
return "", p.mkerr(pos, "invalid quote encountered")
236+
}
237+
238+
uq, err := unquote(s)
231239
if err != nil {
232240
return "", p.mkerr(pos, "unquoting failed: %v", err)
233241
}

filters/quote.go

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
package filters
2+
3+
import (
4+
"unicode/utf8"
5+
6+
"github.com/pkg/errors"
7+
)
8+
9+
// NOTE(stevvooe): Most of this code in this file is copied from the stdlib
10+
// strconv package and modified to be able to handle quoting with `/` and `|`
11+
// as delimiters. The copyright is held by the Go authors.
12+
13+
var errQuoteSyntax = errors.New("quote syntax error")
14+
15+
// UnquoteChar decodes the first character or byte in the escaped string
16+
// or character literal represented by the string s.
17+
// It returns four values:
18+
//
19+
// 1) value, the decoded Unicode code point or byte value;
20+
// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
21+
// 3) tail, the remainder of the string after the character; and
22+
// 4) an error that will be nil if the character is syntactically valid.
23+
//
24+
// The second argument, quote, specifies the type of literal being parsed
25+
// and therefore which escaped quote character is permitted.
26+
// If set to a single quote, it permits the sequence \' and disallows unescaped '.
27+
// If set to a double quote, it permits \" and disallows unescaped ".
28+
// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
29+
//
30+
// This is from Go strconv package, modified to support `|` and `/` as double
31+
// quotes for use with regular expressions.
32+
func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
33+
// easy cases
34+
switch c := s[0]; {
35+
case c == quote && (quote == '\'' || quote == '"' || quote == '/' || quote == '|'):
36+
err = errQuoteSyntax
37+
return
38+
case c >= utf8.RuneSelf:
39+
r, size := utf8.DecodeRuneInString(s)
40+
return r, true, s[size:], nil
41+
case c != '\\':
42+
return rune(s[0]), false, s[1:], nil
43+
}
44+
45+
// hard case: c is backslash
46+
if len(s) <= 1 {
47+
err = errQuoteSyntax
48+
return
49+
}
50+
c := s[1]
51+
s = s[2:]
52+
53+
switch c {
54+
case 'a':
55+
value = '\a'
56+
case 'b':
57+
value = '\b'
58+
case 'f':
59+
value = '\f'
60+
case 'n':
61+
value = '\n'
62+
case 'r':
63+
value = '\r'
64+
case 't':
65+
value = '\t'
66+
case 'v':
67+
value = '\v'
68+
case 'x', 'u', 'U':
69+
n := 0
70+
switch c {
71+
case 'x':
72+
n = 2
73+
case 'u':
74+
n = 4
75+
case 'U':
76+
n = 8
77+
}
78+
var v rune
79+
if len(s) < n {
80+
err = errQuoteSyntax
81+
return
82+
}
83+
for j := 0; j < n; j++ {
84+
x, ok := unhex(s[j])
85+
if !ok {
86+
err = errQuoteSyntax
87+
return
88+
}
89+
v = v<<4 | x
90+
}
91+
s = s[n:]
92+
if c == 'x' {
93+
// single-byte string, possibly not UTF-8
94+
value = v
95+
break
96+
}
97+
if v > utf8.MaxRune {
98+
err = errQuoteSyntax
99+
return
100+
}
101+
value = v
102+
multibyte = true
103+
case '0', '1', '2', '3', '4', '5', '6', '7':
104+
v := rune(c) - '0'
105+
if len(s) < 2 {
106+
err = errQuoteSyntax
107+
return
108+
}
109+
for j := 0; j < 2; j++ { // one digit already; two more
110+
x := rune(s[j]) - '0'
111+
if x < 0 || x > 7 {
112+
err = errQuoteSyntax
113+
return
114+
}
115+
v = (v << 3) | x
116+
}
117+
s = s[2:]
118+
if v > 255 {
119+
err = errQuoteSyntax
120+
return
121+
}
122+
value = v
123+
case '\\':
124+
value = '\\'
125+
case '\'', '"', '|', '/':
126+
if c != quote {
127+
err = errQuoteSyntax
128+
return
129+
}
130+
value = rune(c)
131+
default:
132+
err = errQuoteSyntax
133+
return
134+
}
135+
tail = s
136+
return
137+
}
138+
139+
// unquote interprets s as a single-quoted, double-quoted,
140+
// or backquoted Go string literal, returning the string value
141+
// that s quotes. (If s is single-quoted, it would be a Go
142+
// character literal; Unquote returns the corresponding
143+
// one-character string.)
144+
//
145+
// This is modified from the standard library to support `|` and `/` as quote
146+
// characters for use with regular expressions.
147+
func unquote(s string) (string, error) {
148+
n := len(s)
149+
if n < 2 {
150+
return "", errQuoteSyntax
151+
}
152+
quote := s[0]
153+
if quote != s[n-1] {
154+
return "", errQuoteSyntax
155+
}
156+
s = s[1 : n-1]
157+
158+
if quote == '`' {
159+
if contains(s, '`') {
160+
return "", errQuoteSyntax
161+
}
162+
if contains(s, '\r') {
163+
// -1 because we know there is at least one \r to remove.
164+
buf := make([]byte, 0, len(s)-1)
165+
for i := 0; i < len(s); i++ {
166+
if s[i] != '\r' {
167+
buf = append(buf, s[i])
168+
}
169+
}
170+
return string(buf), nil
171+
}
172+
return s, nil
173+
}
174+
if quote != '"' && quote != '\'' && quote != '|' && quote != '/' {
175+
return "", errQuoteSyntax
176+
}
177+
if contains(s, '\n') {
178+
return "", errQuoteSyntax
179+
}
180+
181+
// Is it trivial? Avoid allocation.
182+
if !contains(s, '\\') && !contains(s, quote) {
183+
switch quote {
184+
case '"', '/', '|': // pipe and slash are treated like double quote
185+
return s, nil
186+
case '\'':
187+
r, size := utf8.DecodeRuneInString(s)
188+
if size == len(s) && (r != utf8.RuneError || size != 1) {
189+
return s, nil
190+
}
191+
}
192+
}
193+
194+
var runeTmp [utf8.UTFMax]byte
195+
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
196+
for len(s) > 0 {
197+
c, multibyte, ss, err := unquoteChar(s, quote)
198+
if err != nil {
199+
return "", err
200+
}
201+
s = ss
202+
if c < utf8.RuneSelf || !multibyte {
203+
buf = append(buf, byte(c))
204+
} else {
205+
n := utf8.EncodeRune(runeTmp[:], c)
206+
buf = append(buf, runeTmp[:n]...)
207+
}
208+
if quote == '\'' && len(s) != 0 {
209+
// single-quoted must be single character
210+
return "", errQuoteSyntax
211+
}
212+
}
213+
return string(buf), nil
214+
}
215+
216+
// contains reports whether the string contains the byte c.
217+
func contains(s string, c byte) bool {
218+
for i := 0; i < len(s); i++ {
219+
if s[i] == c {
220+
return true
221+
}
222+
}
223+
return false
224+
}
225+
226+
func unhex(b byte) (v rune, ok bool) {
227+
c := rune(b)
228+
switch {
229+
case '0' <= c && c <= '9':
230+
return c - '0', true
231+
case 'a' <= c && c <= 'f':
232+
return c - 'a' + 10, true
233+
case 'A' <= c && c <= 'F':
234+
return c - 'A' + 10, true
235+
}
236+
return
237+
}

filters/scanner.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func (s *scanner) peek() rune {
8787
return ch
8888
}
8989

90-
func (s *scanner) scan() (int, token, string) {
90+
func (s *scanner) scan() (nextp int, tk token, text string) {
9191
var (
9292
ch = s.next()
9393
pos = s.pos
@@ -101,6 +101,7 @@ chomp:
101101
s.scanQuoted(ch)
102102
return pos, tokenQuoted, s.input[pos:s.ppos]
103103
case isSeparatorRune(ch):
104+
s.value = false
104105
return pos, tokenSeparator, s.input[pos:s.ppos]
105106
case isOperatorRune(ch):
106107
s.scanOperator()
@@ -241,7 +242,7 @@ func isOperatorRune(r rune) bool {
241242

242243
func isQuoteRune(r rune) bool {
243244
switch r {
244-
case '"': // maybe add single quoting?
245+
case '/', '|', '"': // maybe add single quoting?
245246
return true
246247
}
247248

0 commit comments

Comments
 (0)