Skip to content

Commit 0ceb2c4

Browse files
committed
check for unindexed fields in formula
1 parent 292370d commit 0ceb2c4

3 files changed

Lines changed: 110 additions & 10 deletions

File tree

lib/collection/src/collection/payload_index_schema.rs

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize};
88

99
use crate::collection::Collection;
1010
use crate::operations::types::{CollectionResult, UpdateResult};
11+
use crate::operations::universal_query::formula::ExpressionInternal;
1112
use crate::operations::{CollectionUpdateOperations, CreateIndex, FieldIndexOperations};
1213
use crate::problems::unindexed_field;
1314
use crate::save_on_disk::SaveOnDisk;
@@ -103,21 +104,44 @@ impl Collection {
103104
&self,
104105
filter: &Filter,
105106
) -> Option<(JsonPath, Vec<PayloadFieldSchema>)> {
106-
self.payload_index_schema.read().one_unindexed_key(filter)
107+
self.payload_index_schema
108+
.read()
109+
.one_unindexed_filter_key(filter)
107110
}
111+
112+
pub fn one_unindexed_expression_key(
113+
&self,
114+
expr: &ExpressionInternal,
115+
) -> Option<(JsonPath, Vec<PayloadFieldSchema>)> {
116+
self.payload_index_schema
117+
.read()
118+
.one_unindexed_expression_key(expr)
119+
}
120+
}
121+
122+
enum PotentiallyUnindexed<'a> {
123+
Filter(&'a Filter),
124+
Expression(&'a ExpressionInternal),
108125
}
109126

110127
impl PayloadIndexSchema {
111128
/// Returns an arbitrary payload key with acceptable schemas
112129
/// used by `filter` which can be indexed but currently is not.
113130
/// If this function returns `None` all indexable keys in `filter` are indexed.
114-
pub fn one_unindexed_key(
131+
fn one_unindexed_key(
115132
&self,
116-
filter: &Filter,
133+
suspect: PotentiallyUnindexed<'_>,
117134
) -> Option<(JsonPath, Vec<PayloadFieldSchema>)> {
118135
let mut extractor = unindexed_field::Extractor::new(&self.schema);
119136

120-
extractor.update_from_filter_once(None, filter);
137+
match suspect {
138+
PotentiallyUnindexed::Filter(filter) => {
139+
extractor.update_from_filter_once(None, filter);
140+
}
141+
PotentiallyUnindexed::Expression(expression) => {
142+
extractor.update_from_expression(expression);
143+
}
144+
}
121145

122146
// Get the first unindexed field from the extractor.
123147
extractor
@@ -126,4 +150,21 @@ impl PayloadIndexSchema {
126150
.next()
127151
.map(|(key, schema)| (key.clone(), schema.clone()))
128152
}
153+
154+
/// Returns an arbitrary payload key with acceptable schemas
155+
/// used by `filter` which can be indexed but currently is not.
156+
/// If this function returns `None` all indexable keys in `filter` are indexed.
157+
pub fn one_unindexed_filter_key(
158+
&self,
159+
filter: &Filter,
160+
) -> Option<(JsonPath, Vec<PayloadFieldSchema>)> {
161+
self.one_unindexed_key(PotentiallyUnindexed::Filter(filter))
162+
}
163+
164+
pub fn one_unindexed_expression_key(
165+
&self,
166+
expr: &ExpressionInternal,
167+
) -> Option<(JsonPath, Vec<PayloadFieldSchema>)> {
168+
self.one_unindexed_key(PotentiallyUnindexed::Expression(expr))
169+
}
129170
}

lib/collection/src/operations/verification/query.rs

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ use segment::types::StrictModeConfig;
33

44
use super::StrictModeVerification;
55
use crate::collection::Collection;
6+
use crate::operations::types::{CollectionError, CollectionResult};
67
use crate::operations::universal_query::collection_query::{
7-
CollectionQueryGroupsRequest, CollectionQueryRequest,
8+
CollectionQueryGroupsRequest, CollectionQueryRequest, Query,
89
};
910

1011
impl StrictModeVerification for QueryRequestInternal {
@@ -105,6 +106,35 @@ impl StrictModeVerification for QueryGroupsRequestInternal {
105106
}
106107

107108
impl StrictModeVerification for CollectionQueryRequest {
109+
async fn check_custom(
110+
&self,
111+
collection: &Collection,
112+
strict_mode_config: &StrictModeConfig,
113+
) -> CollectionResult<()> {
114+
// check for unindexed fields in formula
115+
if strict_mode_config.unindexed_filtering_retrieve == Some(false) {
116+
if let Some(Query::Formula(formula)) = self.query.as_ref() {
117+
if let Some((key, schemas)) =
118+
collection.one_unindexed_expression_key(&formula.formula)
119+
{
120+
let possible_schemas_str = schemas
121+
.iter()
122+
.map(|schema| schema.to_string())
123+
.collect::<Vec<_>>()
124+
.join(", ");
125+
126+
return Err(CollectionError::strict_mode(
127+
format!(
128+
"Index required but not found for \"{key}\" of one of the following types: [{possible_schemas_str}]",
129+
),
130+
"Create an index for this key or use a different expression.",
131+
));
132+
}
133+
}
134+
}
135+
Ok(())
136+
}
137+
108138
fn query_limit(&self) -> Option<usize> {
109139
Some(self.limit)
110140
}
@@ -127,6 +157,35 @@ impl StrictModeVerification for CollectionQueryRequest {
127157
}
128158

129159
impl StrictModeVerification for CollectionQueryGroupsRequest {
160+
// check for unindexed fields in formula
161+
async fn check_custom(
162+
&self,
163+
collection: &Collection,
164+
strict_mode_config: &StrictModeConfig,
165+
) -> CollectionResult<()> {
166+
if strict_mode_config.unindexed_filtering_retrieve == Some(false) {
167+
if let Some(Query::Formula(formula)) = self.query.as_ref() {
168+
if let Some((key, schemas)) =
169+
collection.one_unindexed_expression_key(&formula.formula)
170+
{
171+
let possible_schemas_str = schemas
172+
.iter()
173+
.map(|schema| schema.to_string())
174+
.collect::<Vec<_>>()
175+
.join(", ");
176+
177+
return Err(CollectionError::strict_mode(
178+
format!(
179+
"Index required but not found for \"{key}\" of one of the following types: [{possible_schemas_str}]",
180+
),
181+
"Create an index for this key or use a different expression.",
182+
));
183+
}
184+
}
185+
}
186+
Ok(())
187+
}
188+
130189
fn query_limit(&self) -> Option<usize> {
131190
Some(self.limit)
132191
}

lib/collection/src/tests/payload.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ async fn test_payload_missing_index_check() {
6969
shard
7070
.payload_index_schema
7171
.read()
72-
.one_unindexed_key(&geo_filter)
72+
.one_unindexed_filter_key(&geo_filter)
7373
.map(|(x, _)| x),
7474
Some(JsonPath::from_str("location").unwrap())
7575
);
@@ -88,7 +88,7 @@ async fn test_payload_missing_index_check() {
8888
shard
8989
.payload_index_schema
9090
.read()
91-
.one_unindexed_key(&geo_filter),
91+
.one_unindexed_filter_key(&geo_filter),
9292
None
9393
);
9494

@@ -111,7 +111,7 @@ async fn test_payload_missing_index_check() {
111111
shard
112112
.payload_index_schema
113113
.read()
114-
.one_unindexed_key(&num_filter)
114+
.one_unindexed_filter_key(&num_filter)
115115
.map(|(x, _)| x),
116116
Some("location.lat".parse().unwrap())
117117
);
@@ -130,7 +130,7 @@ async fn test_payload_missing_index_check() {
130130
shard
131131
.payload_index_schema
132132
.read()
133-
.one_unindexed_key(&num_filter),
133+
.one_unindexed_filter_key(&num_filter),
134134
None,
135135
);
136136

@@ -140,7 +140,7 @@ async fn test_payload_missing_index_check() {
140140
shard
141141
.payload_index_schema
142142
.read()
143-
.one_unindexed_key(&combined_filter),
143+
.one_unindexed_filter_key(&combined_filter),
144144
None,
145145
);
146146
}

0 commit comments

Comments
 (0)