Skip to content

Commit 6962b12

Browse files
committed
Strict mode detects full scan on query
1 parent d177fd5 commit 6962b12

2 files changed

Lines changed: 184 additions & 5 deletions

File tree

lib/collection/src/operations/verification/query.rs

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::operations::universal_query::collection_query::{
88
};
99

1010
impl Query {
11-
fn check_strict_mode(
11+
async fn check_strict_mode(
1212
&self,
1313
collection: &Collection,
1414
strict_mode_config: &StrictModeConfig,
@@ -35,6 +35,54 @@ impl Query {
3535
}
3636
Ok(())
3737
}
38+
39+
/// Check that the query does not perform a fullscan based on the collection configuration.
40+
async fn check_fullscan(
41+
&self,
42+
using: &str,
43+
collection: &Collection,
44+
strict_mode_config: &StrictModeConfig,
45+
) -> CollectionResult<()> {
46+
// Check only applies on `search_allow_exact`
47+
if strict_mode_config.search_allow_exact == Some(false) {
48+
match &self {
49+
Query::Fusion(_) | Query::OrderBy(_) | Query::Formula(_) | Query::Sample(_) => (),
50+
Query::Vector(_) => {
51+
let config = collection.collection_config.read().await;
52+
53+
// ignore sparse vectors
54+
let query_targets_sparse = config
55+
.params
56+
.sparse_vectors
57+
.as_ref()
58+
.is_some_and(|sparse| sparse.contains_key(using));
59+
if query_targets_sparse {
60+
// sparse vectors are always indexed
61+
return Ok(());
62+
}
63+
64+
// check HNSW configuration for vector
65+
let vector_hnsw_config = &config
66+
.params
67+
.vectors
68+
.get_params(using)
69+
.and_then(|param| param.hnsw_config.as_ref());
70+
71+
let vector_hnsw_m = vector_hnsw_config.and_then(|hnsw| hnsw.m);
72+
// TODO(strict-mode) check also payload_m if if there is a filter by tenant/principal
73+
if vector_hnsw_m == Some(0) {
74+
return Err(CollectionError::strict_mode(
75+
format!(
76+
"Fullscan forbidden on '{using}' – vector indexing is disabled (hnsw_config.m = 0)"
77+
),
78+
"Enable vector indexing or use a prefetch query before rescoring",
79+
));
80+
}
81+
}
82+
}
83+
}
84+
Ok(())
85+
}
3886
}
3987

4088
impl StrictModeVerification for CollectionQueryRequest {
@@ -45,12 +93,22 @@ impl StrictModeVerification for CollectionQueryRequest {
4593
) -> CollectionResult<()> {
4694
// CollectionPrefetch.prefetch is of type CollectionPrefetch (recursive type)
4795
for prefetch in &self.prefetch {
48-
Box::pin(prefetch.check_strict_mode(collection, strict_mode_config)).await?;
96+
prefetch
97+
.check_strict_mode(collection, strict_mode_config)
98+
.await?;
4999
}
50100

51101
if let Some(query) = self.query.as_ref() {
102+
// check query can perform fullscan when not rescoring
103+
if self.prefetch.is_empty() {
104+
query
105+
.check_fullscan(&self.using, collection, strict_mode_config)
106+
.await?;
107+
}
52108
// check for unindexed fields in formula
53-
query.check_strict_mode(collection, strict_mode_config)?
109+
query
110+
.check_strict_mode(collection, strict_mode_config)
111+
.await?
54112
}
55113

56114
Ok(())
@@ -89,8 +147,14 @@ impl StrictModeVerification for CollectionPrefetch {
89147
}
90148

91149
if let Some(query) = self.query.as_ref() {
150+
// check if prefetch can perform a fullscan
151+
query
152+
.check_fullscan(&self.using, collection, strict_mode_config)
153+
.await?;
92154
// check for unindexed fields in formula
93-
query.check_strict_mode(collection, strict_mode_config)?
155+
query
156+
.check_strict_mode(collection, strict_mode_config)
157+
.await?
94158
}
95159

96160
Ok(())
@@ -124,8 +188,16 @@ impl StrictModeVerification for CollectionQueryGroupsRequest {
124188
strict_mode_config: &StrictModeConfig,
125189
) -> CollectionResult<()> {
126190
if let Some(query) = self.query.as_ref() {
191+
// check query can perform fullscan when not rescoring
192+
if self.prefetch.is_empty() {
193+
query
194+
.check_fullscan(&self.using, collection, strict_mode_config)
195+
.await?;
196+
}
127197
// check for unindexed fields in formula
128-
query.check_strict_mode(collection, strict_mode_config)?
198+
query
199+
.check_strict_mode(collection, strict_mode_config)
200+
.await?
129201
}
130202
Ok(())
131203
}

tests/openapi/test_strictmode.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,3 +1809,110 @@ def check_multivector_query_id(should_succeed: bool):
18091809
# + 1 of the filter for not including the id
18101810
})
18111811
check_multivector_query_id(should_succeed=True)
1812+
1813+
def test_strict_mode_full_scan(full_collection_name):
1814+
collection_name = full_collection_name
1815+
1816+
# disable HNSW index
1817+
response = request_with_validation(
1818+
api='/collections/{collection_name}',
1819+
method="PATCH",
1820+
path_params={'collection_name': collection_name},
1821+
body={
1822+
"vectors": {
1823+
"dense-multi": {
1824+
"hnsw_config": {
1825+
"m": 0,
1826+
},
1827+
},
1828+
}
1829+
}
1830+
)
1831+
assert response.ok
1832+
1833+
# full scan allowed
1834+
response = request_with_validation(
1835+
api='/collections/{collection_name}/points/query',
1836+
method="POST",
1837+
path_params={'collection_name': collection_name},
1838+
body={
1839+
"query": 2,
1840+
"using": "dense-multi",
1841+
"limit": 5
1842+
}
1843+
)
1844+
assert response.ok
1845+
1846+
# enable strict mode with search_allow_exact
1847+
set_strict_mode(collection_name, {
1848+
"enabled": True,
1849+
"search_allow_exact": False
1850+
})
1851+
1852+
# full scan not allowed
1853+
response = request_with_validation(
1854+
api='/collections/{collection_name}/points/query',
1855+
method="POST",
1856+
path_params={'collection_name': collection_name},
1857+
body={
1858+
"query": 2,
1859+
"using": "dense-multi",
1860+
"limit": 5
1861+
}
1862+
)
1863+
assert not response.ok
1864+
assert "Fullscan forbidden on 'dense-multi' – vector indexing is disabled (hnsw_config.m = 0). Help: Enable vector indexing or use a prefetch query before rescoring" in response.json()['status']['error']
1865+
1866+
# sparse vector still works
1867+
response = request_with_validation(
1868+
api='/collections/{collection_name}/points/query',
1869+
method="POST",
1870+
path_params={'collection_name': collection_name},
1871+
body={
1872+
"query": 2,
1873+
"using": "sparse-text",
1874+
"limit": 5
1875+
}
1876+
)
1877+
assert response.ok
1878+
1879+
# Disabled HNSW is Ok for rescoring
1880+
response = request_with_validation(
1881+
api='/collections/{collection_name}/points/query',
1882+
method="POST",
1883+
path_params={'collection_name': collection_name},
1884+
body={
1885+
"prefetch": [
1886+
{
1887+
"query": 2,
1888+
"using": "sparse-text",
1889+
"limit": 50
1890+
}
1891+
],
1892+
"query": 2,
1893+
"using": "dense-multi",
1894+
"limit": 5
1895+
}
1896+
)
1897+
assert response.ok
1898+
1899+
# Disabled HNSW forbidden prefetch
1900+
response = request_with_validation(
1901+
api='/collections/{collection_name}/points/query',
1902+
method="POST",
1903+
path_params={'collection_name': collection_name},
1904+
body={
1905+
"prefetch": [
1906+
{
1907+
"query": 2,
1908+
"using": "dense-multi",
1909+
"limit": 50
1910+
}
1911+
],
1912+
"query": 2,
1913+
"using": "sparse-text",
1914+
"limit": 5
1915+
}
1916+
)
1917+
assert not response.ok
1918+
assert "Fullscan forbidden on 'dense-multi' – vector indexing is disabled (hnsw_config.m = 0). Help: Enable vector indexing or use a prefetch query before rescoring" in response.json()['status']['error']

0 commit comments

Comments
 (0)