Skip to content

Commit 1bdf1f3

Browse files
authored
Strict mode detects full scan on query (#6473)
1 parent 9d6a08e commit 1bdf1f3

2 files changed

Lines changed: 184 additions & 5 deletions

File tree

lib/collection/src/operations/verification/query.rs

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::operations::universal_query::collection_query::{
88
};
99

1010
impl Query {
11-
fn check_strict_mode(
11+
async fn check_strict_mode(
1212
&self,
1313
collection: &Collection,
1414
strict_mode_config: &StrictModeConfig,
@@ -35,6 +35,54 @@ impl Query {
3535
}
3636
Ok(())
3737
}
38+
39+
/// Check that the query does not perform a fullscan based on the collection configuration.
40+
async fn check_fullscan(
41+
&self,
42+
using: &str,
43+
collection: &Collection,
44+
strict_mode_config: &StrictModeConfig,
45+
) -> CollectionResult<()> {
46+
// Check only applies on `search_allow_exact`
47+
if strict_mode_config.search_allow_exact == Some(false) {
48+
match &self {
49+
Query::Fusion(_) | Query::OrderBy(_) | Query::Formula(_) | Query::Sample(_) => (),
50+
Query::Vector(_) => {
51+
let config = collection.collection_config.read().await;
52+
53+
// ignore sparse vectors
54+
let query_targets_sparse = config
55+
.params
56+
.sparse_vectors
57+
.as_ref()
58+
.is_some_and(|sparse| sparse.contains_key(using));
59+
if query_targets_sparse {
60+
// sparse vectors are always indexed
61+
return Ok(());
62+
}
63+
64+
// check HNSW configuration for vector
65+
let vector_hnsw_config = &config
66+
.params
67+
.vectors
68+
.get_params(using)
69+
.and_then(|param| param.hnsw_config.as_ref());
70+
71+
let vector_hnsw_m = vector_hnsw_config.and_then(|hnsw| hnsw.m);
72+
// TODO(strict-mode) check also payload_m if if there is a filter by tenant/principal
73+
if vector_hnsw_m == Some(0) {
74+
return Err(CollectionError::strict_mode(
75+
format!(
76+
"Fullscan forbidden on '{using}' – vector indexing is disabled (hnsw_config.m = 0)"
77+
),
78+
"Enable vector indexing or use a prefetch query before rescoring",
79+
));
80+
}
81+
}
82+
}
83+
}
84+
Ok(())
85+
}
3886
}
3987

4088
impl StrictModeVerification for CollectionQueryRequest {
@@ -45,12 +93,22 @@ impl StrictModeVerification for CollectionQueryRequest {
4593
) -> CollectionResult<()> {
4694
// CollectionPrefetch.prefetch is of type CollectionPrefetch (recursive type)
4795
for prefetch in &self.prefetch {
48-
Box::pin(prefetch.check_strict_mode(collection, strict_mode_config)).await?;
96+
prefetch
97+
.check_strict_mode(collection, strict_mode_config)
98+
.await?;
4999
}
50100

51101
if let Some(query) = self.query.as_ref() {
102+
// check query can perform fullscan when not rescoring
103+
if self.prefetch.is_empty() {
104+
query
105+
.check_fullscan(&self.using, collection, strict_mode_config)
106+
.await?;
107+
}
52108
// check for unindexed fields in formula
53-
query.check_strict_mode(collection, strict_mode_config)?
109+
query
110+
.check_strict_mode(collection, strict_mode_config)
111+
.await?
54112
}
55113

56114
Ok(())
@@ -89,8 +147,14 @@ impl StrictModeVerification for CollectionPrefetch {
89147
}
90148

91149
if let Some(query) = self.query.as_ref() {
150+
// check if prefetch can perform a fullscan
151+
query
152+
.check_fullscan(&self.using, collection, strict_mode_config)
153+
.await?;
92154
// check for unindexed fields in formula
93-
query.check_strict_mode(collection, strict_mode_config)?
155+
query
156+
.check_strict_mode(collection, strict_mode_config)
157+
.await?
94158
}
95159

96160
Ok(())
@@ -124,8 +188,16 @@ impl StrictModeVerification for CollectionQueryGroupsRequest {
124188
strict_mode_config: &StrictModeConfig,
125189
) -> CollectionResult<()> {
126190
if let Some(query) = self.query.as_ref() {
191+
// check query can perform fullscan when not rescoring
192+
if self.prefetch.is_empty() {
193+
query
194+
.check_fullscan(&self.using, collection, strict_mode_config)
195+
.await?;
196+
}
127197
// check for unindexed fields in formula
128-
query.check_strict_mode(collection, strict_mode_config)?
198+
query
199+
.check_strict_mode(collection, strict_mode_config)
200+
.await?
129201
}
130202
// check for unindexed fields targeted by group_by
131203
check_grouping_field(&self.group_by, collection, strict_mode_config)?;

tests/openapi/test_strictmode.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,3 +1962,110 @@ def test_strict_mode_group_by_unindexed(collection_name):
19621962
)
19631963
assert response.ok
19641964

1965+
1966+
def test_strict_mode_full_scan(full_collection_name):
1967+
collection_name = full_collection_name
1968+
1969+
# disable HNSW index
1970+
response = request_with_validation(
1971+
api='/collections/{collection_name}',
1972+
method="PATCH",
1973+
path_params={'collection_name': collection_name},
1974+
body={
1975+
"vectors": {
1976+
"dense-multi": {
1977+
"hnsw_config": {
1978+
"m": 0,
1979+
},
1980+
},
1981+
}
1982+
}
1983+
)
1984+
assert response.ok
1985+
1986+
# full scan allowed
1987+
response = request_with_validation(
1988+
api='/collections/{collection_name}/points/query',
1989+
method="POST",
1990+
path_params={'collection_name': collection_name},
1991+
body={
1992+
"query": 2,
1993+
"using": "dense-multi",
1994+
"limit": 5
1995+
}
1996+
)
1997+
assert response.ok
1998+
1999+
# enable strict mode with search_allow_exact
2000+
set_strict_mode(collection_name, {
2001+
"enabled": True,
2002+
"search_allow_exact": False
2003+
})
2004+
2005+
# full scan not allowed
2006+
response = request_with_validation(
2007+
api='/collections/{collection_name}/points/query',
2008+
method="POST",
2009+
path_params={'collection_name': collection_name},
2010+
body={
2011+
"query": 2,
2012+
"using": "dense-multi",
2013+
"limit": 5
2014+
}
2015+
)
2016+
assert not response.ok
2017+
assert "Fullscan forbidden on 'dense-multi' – vector indexing is disabled (hnsw_config.m = 0). Help: Enable vector indexing or use a prefetch query before rescoring" in response.json()['status']['error']
2018+
2019+
# sparse vector still works
2020+
response = request_with_validation(
2021+
api='/collections/{collection_name}/points/query',
2022+
method="POST",
2023+
path_params={'collection_name': collection_name},
2024+
body={
2025+
"query": 2,
2026+
"using": "sparse-text",
2027+
"limit": 5
2028+
}
2029+
)
2030+
assert response.ok
2031+
2032+
# Disabled HNSW is Ok for rescoring
2033+
response = request_with_validation(
2034+
api='/collections/{collection_name}/points/query',
2035+
method="POST",
2036+
path_params={'collection_name': collection_name},
2037+
body={
2038+
"prefetch": [
2039+
{
2040+
"query": 2,
2041+
"using": "sparse-text",
2042+
"limit": 50
2043+
}
2044+
],
2045+
"query": 2,
2046+
"using": "dense-multi",
2047+
"limit": 5
2048+
}
2049+
)
2050+
assert response.ok
2051+
2052+
# Disabled HNSW forbidden prefetch
2053+
response = request_with_validation(
2054+
api='/collections/{collection_name}/points/query',
2055+
method="POST",
2056+
path_params={'collection_name': collection_name},
2057+
body={
2058+
"prefetch": [
2059+
{
2060+
"query": 2,
2061+
"using": "dense-multi",
2062+
"limit": 50
2063+
}
2064+
],
2065+
"query": 2,
2066+
"using": "sparse-text",
2067+
"limit": 5
2068+
}
2069+
)
2070+
assert not response.ok
2071+
assert "Fullscan forbidden on 'dense-multi' – vector indexing is disabled (hnsw_config.m = 0). Help: Enable vector indexing or use a prefetch query before rescoring" in response.json()['status']['error']

0 commit comments

Comments
 (0)