Skip to content

Commit af248be

Browse files
committed
Fix UnindexedField infra to handle lookup&range index requirements
1 parent 5ae56b3 commit af248be

3 files changed

Lines changed: 160 additions & 15 deletions

File tree

lib/collection/src/problems/unindexed_field.rs

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ impl<'a> IssueExtractor<'a> {
309309
pub struct Extractor<'a> {
310310
payload_schema: &'a HashMap<PayloadKeyType, PayloadFieldSchema>,
311311
unindexed_schema: HashMap<PayloadKeyType, Vec<PayloadFieldSchema>>,
312+
needs_match_index: bool,
313+
needs_range_index: bool,
312314
}
313315

314316
impl<'a> Extractor<'a> {
@@ -320,6 +322,8 @@ impl<'a> Extractor<'a> {
320322
let mut extractor = Self {
321323
payload_schema,
322324
unindexed_schema: HashMap::new(),
325+
needs_match_index: false,
326+
needs_range_index: false,
323327
};
324328

325329
extractor.update_from_filter(None, filter);
@@ -332,6 +336,8 @@ impl<'a> Extractor<'a> {
332336
Self {
333337
payload_schema,
334338
unindexed_schema: HashMap::new(),
339+
needs_match_index: false,
340+
needs_range_index: false,
335341
}
336342
}
337343

@@ -364,6 +370,8 @@ impl<'a> Extractor<'a> {
364370
match condition {
365371
Condition::Field(field_condition) => {
366372
key = &field_condition.key;
373+
self.update_needs_match(field_condition.r#match.is_some());
374+
self.update_needs_range(field_condition.range.is_some());
367375
inferred = infer_schema_from_field_condition(field_condition);
368376
}
369377
Condition::Filter(filter) => {
@@ -402,25 +410,36 @@ impl<'a> Extractor<'a> {
402410
}
403411
}
404412

413+
/// Capture that an index with Match capability is required.
414+
/// Does not toggle off if has been set once already.
415+
fn update_needs_match(&mut self, is_match: bool) {
416+
if !self.needs_match_index {
417+
self.needs_match_index = is_match
418+
}
419+
}
420+
421+
/// Capture that an index with Match capability is required.
422+
/// Does not toggle off if has been set once already.
423+
fn update_needs_range(&mut self, is_range: bool) {
424+
if !self.needs_range_index {
425+
self.needs_range_index = is_range
426+
}
427+
}
428+
405429
fn needs_index(&self, key: &JsonPath, inferred: &[PayloadFieldSchema]) -> bool {
406430
match self.payload_schema.get(key) {
407431
Some(index_info) => {
408-
let index_info_kind = index_info.kind();
432+
if self.needs_match_index && !index_info.supports_match() {
433+
return true;
434+
}
409435

436+
if self.needs_range_index && !index_info.supports_range() {
437+
return true;
438+
}
439+
440+
let index_info_kind = index_info.kind();
410441
let already_indexed = inferred
411442
.iter()
412-
// TODO(strict-mode):
413-
// Use better comparisons for parametrized indexes. An idea is to make the inferring step
414-
// also output valid parametrized indexes and compare those instead of just the kind (index type)
415-
//
416-
// The only reason why it would be needed is because integer index can be parametrized
417-
// with just lookup or just range, so it is possible to make a false negative here. E.g.
418-
//
419-
// condition: MatchValue
420-
// inferred: FieldType(Integer)
421-
// index_info: FieldParams(IntegerIndex(range))
422-
//
423-
// In this case, we would assume that the field is indexed correctly when it is not
424443
.map(PayloadFieldSchema::kind)
425444
.any(|inferred| inferred == index_info_kind);
426445

lib/segment/src/types.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,6 +1898,34 @@ impl PayloadFieldSchema {
18981898
},
18991899
}
19001900
}
1901+
1902+
/// Check if this type supports a `range` condition
1903+
pub fn supports_range(&self) -> bool {
1904+
match self {
1905+
PayloadFieldSchema::FieldType(payload_schema_type) => match payload_schema_type {
1906+
PayloadSchemaType::Float => true,
1907+
PayloadSchemaType::Integer => true,
1908+
PayloadSchemaType::Keyword => false,
1909+
PayloadSchemaType::Uuid => false,
1910+
PayloadSchemaType::Bool => false,
1911+
PayloadSchemaType::Geo => false,
1912+
PayloadSchemaType::Text => false,
1913+
PayloadSchemaType::Datetime => false,
1914+
},
1915+
PayloadFieldSchema::FieldParams(payload_schema_params) => match payload_schema_params {
1916+
PayloadSchemaParams::Float(_) => true,
1917+
PayloadSchemaParams::Integer(integer_index_params) => {
1918+
integer_index_params.range == Some(true)
1919+
}
1920+
PayloadSchemaParams::Keyword(_) => false,
1921+
PayloadSchemaParams::Uuid(_) => false,
1922+
PayloadSchemaParams::Bool(_) => false,
1923+
PayloadSchemaParams::Geo(_) => false,
1924+
PayloadSchemaParams::Text(_) => false,
1925+
PayloadSchemaParams::Datetime(_) => false,
1926+
},
1927+
}
1928+
}
19011929
}
19021930

19031931
impl From<PayloadSchemaType> for PayloadFieldSchema {

tests/openapi/test_strictmode.py

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def search_request_with_timeout(timeout):
162162
assert not search_fail.ok
163163

164164

165-
def test_strict_mode_unindexed_filter_read_validation(collection_name):
165+
def test_strict_mode_unindexed_filter_keyword_read_validation(collection_name):
166166
def search_request_with_filter():
167167
return request_with_validation(
168168
api='/collections/{collection_name}/points/search',
@@ -217,6 +217,84 @@ def search_request_with_filter():
217217
search_request_with_filter().raise_for_status()
218218

219219

220+
def test_strict_mode_unindexed_filter_integer_read_validation(collection_name):
221+
def search_request_with_filter():
222+
return request_with_validation(
223+
api='/collections/{collection_name}/points/search',
224+
method="POST",
225+
path_params={'collection_name': collection_name},
226+
body={
227+
"vector": [0.2, 0.1, 0.9, 0.7],
228+
"limit": 3,
229+
"filter": {
230+
"must": [
231+
{
232+
"key": "count",
233+
"match": {
234+
"value": 1
235+
}
236+
}
237+
]
238+
},
239+
}
240+
)
241+
242+
search_request_with_filter().raise_for_status()
243+
244+
set_strict_mode(collection_name, {
245+
"enabled": True,
246+
"unindexed_filtering_retrieve": True,
247+
})
248+
249+
search_request_with_filter().raise_for_status()
250+
251+
set_strict_mode(collection_name, {
252+
"unindexed_filtering_retrieve": False,
253+
})
254+
255+
search_fail = search_request_with_filter()
256+
257+
assert "count" in search_fail.json()['status']['error']
258+
assert not search_fail.ok
259+
260+
request_with_validation(
261+
api='/collections/{collection_name}/index',
262+
method="PUT",
263+
path_params={'collection_name': collection_name},
264+
query_params={'wait': 'true'},
265+
body={
266+
"field_name": "count",
267+
"field_schema": {
268+
"type": "integer",
269+
"lookup": False,
270+
"range": True,
271+
}
272+
}
273+
).raise_for_status()
274+
275+
# the integer index does not support `lookup` on our match condition
276+
assert "count" in search_fail.json()['status']['error']
277+
assert not search_fail.ok
278+
279+
request_with_validation(
280+
api='/collections/{collection_name}/index',
281+
method="PUT",
282+
path_params={'collection_name': collection_name},
283+
query_params={'wait': 'true'},
284+
body={
285+
"field_name": "count",
286+
"field_schema": {
287+
"type": "integer",
288+
"lookup": True,
289+
"range": True,
290+
}
291+
}
292+
).raise_for_status()
293+
294+
# We created an index with `lookup` on this field so it should work now
295+
search_request_with_filter().raise_for_status()
296+
297+
220298
def test_strict_mode_unindexed_filter_write_validation(collection_name):
221299
def update_request_with_filter():
222300
return request_with_validation(
@@ -1013,7 +1091,7 @@ def upsert_points(ids: list[int]):
10131091
})
10141092

10151093
for i in range(32):
1016-
print(upsert_points([3, 4]).json())
1094+
upsert_points([3, 4]).json()
10171095

10181096
# Max limit has been reached and one of the next requests must fail. Due to cache it might not be the first call!
10191097
for i in range(32):
@@ -1368,6 +1446,26 @@ def query_request():
13681446
assert "discount_price" in query_fail.json()['status']['error']
13691447
assert "formula expression" in query_fail.json()['status']['error']
13701448

1449+
# Create index on `discount_price`
1450+
request_with_validation(
1451+
api='/collections/{collection_name}/index',
1452+
method="PUT",
1453+
path_params={'collection_name': collection_name},
1454+
query_params={'wait': 'true'},
1455+
body={
1456+
"field_name": "discount_price",
1457+
"field_schema": {
1458+
"type": "integer",
1459+
"lookup": True,
1460+
"range": False,
1461+
}
1462+
}
1463+
).raise_for_status()
1464+
1465+
# Query succeeds with the index
1466+
query_ok = query_request()
1467+
assert query_ok.ok
1468+
13711469

13721470
def test_strict_mode_read_rate_limiting_small_replenish(collection_name):
13731471
"""

0 commit comments

Comments
 (0)