@@ -8,14 +8,13 @@ use http::{HeaderMap, HeaderValue, Method, Uri};
88use issues:: { Action , Code , ImmediateSolution , Issue , Solution } ;
99use itertools:: Itertools ;
1010use segment:: common:: operation_error:: OperationError ;
11- use segment:: data_types:: index:: { TextIndexParams , TextIndexType , TokenizerType } ;
1211use segment:: index:: query_optimization:: rescore_formula:: parsed_formula:: VariableId ;
1312use segment:: json_path:: JsonPath ;
1413use segment:: types:: {
1514 AnyVariants , Condition , FieldCondition , Filter , Match , MatchValue , PayloadFieldSchema ,
1615 PayloadKeyType , PayloadSchemaParams , PayloadSchemaType , RangeInterface , UuidPayloadType ,
1716} ;
18- use strum:: IntoEnumIterator as _;
17+ use strum:: { EnumIter , IntoEnumIterator as _} ;
1918
2019use crate :: operations:: universal_query:: formula:: ExpressionInternal ;
2120#[ derive( Debug ) ]
@@ -162,33 +161,33 @@ impl Issue for UnindexedField {
162161}
163162
164163/// Suggest any index, let user choose depending on their data type
165- fn all_indexes ( ) -> impl Iterator < Item = PayloadFieldSchema > {
166- PayloadSchemaType :: iter ( ) . map ( PayloadFieldSchema :: FieldType )
164+ fn all_indexes ( ) -> impl Iterator < Item = FieldIndexType > {
165+ FieldIndexType :: iter ( )
167166}
168167
169- fn infer_schema_from_match_value ( value : & MatchValue ) -> Vec < PayloadFieldSchema > {
168+ fn infer_index_from_match_value ( value : & MatchValue ) -> Vec < FieldIndexType > {
170169 match & value. value {
171170 segment:: types:: ValueVariants :: String ( string) => {
172171 let mut inferred = Vec :: new ( ) ;
173172
174173 if UuidPayloadType :: parse_str ( string) . is_ok ( ) {
175- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Uuid ) )
174+ inferred. push ( FieldIndexType :: UuidMatch )
176175 }
177176
178- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Keyword ) ) ;
177+ inferred. push ( FieldIndexType :: KeywordMatch ) ;
179178
180179 inferred
181180 }
182181 segment:: types:: ValueVariants :: Integer ( _integer) => {
183- vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ]
182+ vec ! [ FieldIndexType :: IntMatch ]
184183 }
185184 segment:: types:: ValueVariants :: Bool ( _boolean) => {
186- vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Bool ) ]
185+ vec ! [ FieldIndexType :: BoolMatch ]
187186 }
188187 }
189188}
190189
191- fn infer_schema_from_any_variants ( value : & AnyVariants ) -> Vec < PayloadFieldSchema > {
190+ fn infer_index_from_any_variants ( value : & AnyVariants ) -> Vec < FieldIndexType > {
192191 match value {
193192 AnyVariants :: Strings ( strings) => {
194193 let mut inferred = Vec :: new ( ) ;
@@ -197,20 +196,20 @@ fn infer_schema_from_any_variants(value: &AnyVariants) -> Vec<PayloadFieldSchema
197196 . iter ( )
198197 . all ( |s| UuidPayloadType :: parse_str ( s) . is_ok ( ) )
199198 {
200- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Uuid ) )
199+ inferred. push ( FieldIndexType :: UuidMatch )
201200 }
202201
203- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Keyword ) ) ;
202+ inferred. push ( FieldIndexType :: KeywordMatch ) ;
204203
205204 inferred
206205 }
207206 AnyVariants :: Integers ( _integers) => {
208- vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ]
207+ vec ! [ FieldIndexType :: IntMatch ]
209208 }
210209 }
211210}
212211
213- fn infer_schema_from_field_condition ( field_condition : & FieldCondition ) -> Vec < PayloadFieldSchema > {
212+ fn infer_index_from_field_condition ( field_condition : & FieldCondition ) -> Vec < FieldIndexType > {
214213 let FieldCondition {
215214 key : _key,
216215 r#match,
@@ -223,47 +222,36 @@ fn infer_schema_from_field_condition(field_condition: &FieldCondition) -> Vec<Pa
223222 is_null,
224223 } = field_condition;
225224
226- let mut inferred = Vec :: new ( ) ;
225+ let mut required_indexes = Vec :: new ( ) ;
227226
228227 if let Some ( r#match) = r#match {
229- inferred. extend ( match r#match {
230- Match :: Value ( match_value) => infer_schema_from_match_value ( match_value) ,
231- Match :: Text ( _match_text) => {
232- vec ! [ PayloadFieldSchema :: FieldParams ( PayloadSchemaParams :: Text (
233- TextIndexParams {
234- r#type: TextIndexType :: Text ,
235- tokenizer: TokenizerType :: default ( ) ,
236- min_token_len: None ,
237- max_token_len: None ,
238- lowercase: None ,
239- on_disk: None ,
240- } ,
241- ) ) ]
242- }
243- Match :: Any ( match_any) => infer_schema_from_any_variants ( & match_any. any ) ,
244- Match :: Except ( match_except) => infer_schema_from_any_variants ( & match_except. except ) ,
228+ required_indexes. extend ( match r#match {
229+ Match :: Value ( match_value) => infer_index_from_match_value ( match_value) ,
230+ Match :: Text ( _match_text) => vec ! [ FieldIndexType :: Text ] ,
231+ Match :: Any ( match_any) => infer_index_from_any_variants ( & match_any. any ) ,
232+ Match :: Except ( match_except) => infer_index_from_any_variants ( & match_except. except ) ,
245233 } )
246234 }
247235 if let Some ( range_interface) = range {
248236 match range_interface {
249237 RangeInterface :: DateTime ( _) => {
250- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Datetime ) ) ;
238+ required_indexes . push ( FieldIndexType :: DatetimeRange ) ;
251239 }
252240 RangeInterface :: Float ( _) => {
253- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Float ) ) ;
254- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ) ;
241+ required_indexes . push ( FieldIndexType :: FloatRange ) ;
242+ required_indexes . push ( FieldIndexType :: IntRange ) ;
255243 }
256244 }
257245 }
258246 if geo_bounding_box. is_some ( ) || geo_radius. is_some ( ) || geo_polygon. is_some ( ) {
259- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Geo ) ) ;
247+ required_indexes . push ( FieldIndexType :: Geo ) ;
260248 }
261249 if values_count. is_some ( ) || is_empty. is_some ( ) || is_null. is_some ( ) {
262250 // Any index will do, let user choose depending on their data type
263- inferred . extend ( all_indexes ( ) ) ;
251+ required_indexes . extend ( all_indexes ( ) ) ;
264252 }
265253
266- inferred
254+ required_indexes
267255}
268256
269257pub struct IssueExtractor < ' a > {
@@ -359,12 +347,12 @@ impl<'a> Extractor<'a> {
359347
360348 fn update_from_condition ( & mut self , nested_prefix : Option < & JsonPath > , condition : & Condition ) {
361349 let key;
362- let inferred ;
350+ let required_index ;
363351
364352 match condition {
365353 Condition :: Field ( field_condition) => {
366354 key = & field_condition. key ;
367- inferred = infer_schema_from_field_condition ( field_condition) ;
355+ required_index = infer_index_from_field_condition ( field_condition) ;
368356 }
369357 Condition :: Filter ( filter) => {
370358 self . update_from_filter ( nested_prefix, filter) ;
@@ -377,14 +365,14 @@ impl<'a> Extractor<'a> {
377365 ) ;
378366 return ;
379367 }
380- // Any index will suffice
368+ // Any index will suffice to get the satellite null index
381369 Condition :: IsEmpty ( is_empty) => {
382370 key = & is_empty. is_empty . key ;
383- inferred = all_indexes ( ) . collect ( ) ;
371+ required_index = all_indexes ( ) . collect ( ) ;
384372 }
385373 Condition :: IsNull ( is_null) => {
386374 key = & is_null. is_null . key ;
387- inferred = all_indexes ( ) . collect ( ) ;
375+ required_index = all_indexes ( ) . collect ( ) ;
388376 }
389377 // No index needed
390378 Condition :: HasId ( _) => return ,
@@ -394,35 +382,26 @@ impl<'a> Extractor<'a> {
394382
395383 let full_key = JsonPath :: extend_or_new ( nested_prefix, key) ;
396384
397- if self . needs_index ( & full_key, & inferred) {
385+ if self . needs_index ( & full_key, & required_index) {
386+ let schemas = required_index
387+ . into_iter ( )
388+ . map ( PayloadSchemaType :: from)
389+ . map ( PayloadFieldSchema :: FieldType ) ;
398390 self . unindexed_schema
399391 . entry ( full_key)
400392 . or_default ( )
401- . extend ( inferred ) ;
393+ . extend ( schemas ) ;
402394 }
403395 }
404396
405- fn needs_index ( & self , key : & JsonPath , inferred : & [ PayloadFieldSchema ] ) -> bool {
397+ fn needs_index ( & self , key : & JsonPath , required_indexes : & [ FieldIndexType ] ) -> bool {
406398 match self . payload_schema . get ( key) {
407399 Some ( index_info) => {
408- let index_info_kind = index_info . kind ( ) ;
409-
410- let already_indexed = inferred
400+ // check if the index present has the right capabilities
401+ let index_field_types = schema_capabilities ( index_info ) ;
402+ let already_indexed = required_indexes
411403 . iter ( )
412- // TODO(strict-mode):
413- // Use better comparisons for parametrized indexes. An idea is to make the inferring step
414- // also output valid parametrized indexes and compare those instead of just the kind (index type)
415- //
416- // The only reason why it would be needed is because integer index can be parametrized
417- // with just lookup or just range, so it is possible to make a false negative here. E.g.
418- //
419- // condition: MatchValue
420- // inferred: FieldType(Integer)
421- // index_info: FieldParams(IntegerIndex(range))
422- //
423- // In this case, we would assume that the field is indexed correctly when it is not
424- . map ( PayloadFieldSchema :: kind)
425- . any ( |inferred| inferred == index_info_kind) ;
404+ . any ( |required| index_field_types. contains ( required) ) ;
426405
427406 !already_indexed
428407 }
@@ -432,7 +411,7 @@ impl<'a> Extractor<'a> {
432411
433412 pub fn update_from_expression ( & mut self , expression : & ExpressionInternal ) {
434413 let key;
435- let inferred ;
414+ let required_index ;
436415
437416 match expression {
438417 ExpressionInternal :: Constant ( _) => return ,
@@ -447,9 +426,10 @@ impl<'a> Extractor<'a> {
447426 VariableId :: Score ( _) => return ,
448427 VariableId :: Payload ( json_path) => {
449428 key = json_path;
450- inferred = vec ! [
451- PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ,
452- PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Float ) ,
429+ required_index = vec ! [
430+ FieldIndexType :: IntMatch ,
431+ FieldIndexType :: IntRange ,
432+ FieldIndexType :: FloatRange ,
453433 ] ;
454434 }
455435 VariableId :: Condition ( _) => return ,
@@ -461,12 +441,12 @@ impl<'a> Extractor<'a> {
461441 }
462442 ExpressionInternal :: GeoDistance { origin : _, to } => {
463443 key = to. clone ( ) ;
464- inferred = vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Geo ) ] ;
444+ required_index = vec ! [ FieldIndexType :: Geo ] ;
465445 }
466446 ExpressionInternal :: Datetime ( _) => return ,
467447 ExpressionInternal :: DatetimeKey ( variable) => {
468448 key = variable. clone ( ) ;
469- inferred = vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Datetime ) ] ;
449+ required_index = vec ! [ FieldIndexType :: DatetimeRange ] ;
470450 }
471451 ExpressionInternal :: Mult ( expression_internals) => {
472452 for expr in expression_internals {
@@ -533,11 +513,116 @@ impl<'a> Extractor<'a> {
533513 }
534514 }
535515
536- if self . needs_index ( & key, & inferred) {
516+ if self . needs_index ( & key, & required_index) {
517+ let schemas = required_index
518+ . into_iter ( )
519+ . map ( PayloadSchemaType :: from)
520+ . map ( PayloadFieldSchema :: FieldType ) ;
537521 self . unindexed_schema
538522 . entry ( key)
539523 . or_default ( )
540- . extend ( inferred ) ;
524+ . extend ( schemas ) ;
541525 }
542526 }
543527}
528+
529+ /// All types of internal indexes
530+ #[ derive( Debug , Eq , PartialEq , EnumIter , Hash ) ]
531+ enum FieldIndexType {
532+ IntMatch ,
533+ IntRange ,
534+ KeywordMatch ,
535+ FloatRange ,
536+ Text ,
537+ BoolMatch ,
538+ UuidMatch ,
539+ UuidRange ,
540+ DatetimeRange ,
541+ Geo ,
542+ }
543+
544+ fn schema_capabilities ( value : & PayloadFieldSchema ) -> HashSet < FieldIndexType > {
545+ let mut index_types = HashSet :: new ( ) ;
546+ match value {
547+ PayloadFieldSchema :: FieldType ( payload_schema_type) => match payload_schema_type {
548+ PayloadSchemaType :: Keyword => index_types. insert ( FieldIndexType :: KeywordMatch ) ,
549+ PayloadSchemaType :: Integer => {
550+ index_types. insert ( FieldIndexType :: IntMatch ) ;
551+ index_types. insert ( FieldIndexType :: IntRange )
552+ }
553+ PayloadSchemaType :: Uuid => {
554+ index_types. insert ( FieldIndexType :: UuidMatch ) ;
555+ index_types. insert ( FieldIndexType :: UuidRange )
556+ }
557+ PayloadSchemaType :: Bool => index_types. insert ( FieldIndexType :: BoolMatch ) ,
558+ PayloadSchemaType :: Float => index_types. insert ( FieldIndexType :: FloatRange ) ,
559+ PayloadSchemaType :: Geo => index_types. insert ( FieldIndexType :: Geo ) ,
560+ PayloadSchemaType :: Text => index_types. insert ( FieldIndexType :: Text ) ,
561+ PayloadSchemaType :: Datetime => index_types. insert ( FieldIndexType :: DatetimeRange ) ,
562+ } ,
563+ PayloadFieldSchema :: FieldParams ( payload_schema_params) => match payload_schema_params {
564+ PayloadSchemaParams :: Keyword ( _) => index_types. insert ( FieldIndexType :: KeywordMatch ) ,
565+ PayloadSchemaParams :: Integer ( integer_index_params) => {
566+ if integer_index_params. lookup == Some ( true ) {
567+ index_types. insert ( FieldIndexType :: IntMatch ) ;
568+ }
569+ if integer_index_params. range == Some ( true ) {
570+ index_types. insert ( FieldIndexType :: IntRange ) ;
571+ }
572+ debug_assert ! (
573+ !index_types. is_empty( ) ,
574+ "lookup or range must be true for Integer payload index"
575+ ) ;
576+ // unifying match arm types
577+ true
578+ }
579+ PayloadSchemaParams :: Uuid ( _) => {
580+ index_types. insert ( FieldIndexType :: UuidMatch ) ;
581+ index_types. insert ( FieldIndexType :: UuidRange )
582+ }
583+ PayloadSchemaParams :: Bool ( _) => index_types. insert ( FieldIndexType :: BoolMatch ) ,
584+ PayloadSchemaParams :: Float ( _) => index_types. insert ( FieldIndexType :: FloatRange ) ,
585+ PayloadSchemaParams :: Geo ( _) => index_types. insert ( FieldIndexType :: Geo ) ,
586+ PayloadSchemaParams :: Text ( _) => index_types. insert ( FieldIndexType :: Text ) ,
587+ PayloadSchemaParams :: Datetime ( _) => index_types. insert ( FieldIndexType :: DatetimeRange ) ,
588+ } ,
589+ } ;
590+ index_types
591+ }
592+
593+ impl From < FieldIndexType > for PayloadSchemaType {
594+ fn from ( val : FieldIndexType ) -> Self {
595+ match val {
596+ FieldIndexType :: IntMatch => PayloadSchemaType :: Integer ,
597+ FieldIndexType :: IntRange => PayloadSchemaType :: Integer ,
598+ FieldIndexType :: KeywordMatch => PayloadSchemaType :: Keyword ,
599+ FieldIndexType :: FloatRange => PayloadSchemaType :: Float ,
600+ FieldIndexType :: Text => PayloadSchemaType :: Text ,
601+ FieldIndexType :: BoolMatch => PayloadSchemaType :: Bool ,
602+ FieldIndexType :: UuidMatch => PayloadSchemaType :: Uuid ,
603+ FieldIndexType :: UuidRange => PayloadSchemaType :: Uuid ,
604+ FieldIndexType :: DatetimeRange => PayloadSchemaType :: Datetime ,
605+ FieldIndexType :: Geo => PayloadSchemaType :: Geo ,
606+ }
607+ }
608+ }
609+
610+ #[ cfg( test) ]
611+ mod tests {
612+ use segment:: data_types:: index:: IntegerIndexParams ;
613+
614+ use super :: * ;
615+
616+ #[ test]
617+ fn integer_index_capacities ( ) {
618+ let params = PayloadSchemaParams :: Integer ( IntegerIndexParams {
619+ lookup : Some ( true ) ,
620+ range : Some ( true ) ,
621+ ..Default :: default ( )
622+ } ) ;
623+ let schema = PayloadFieldSchema :: FieldParams ( params) ;
624+ let index_types = schema_capabilities ( & schema) ;
625+ assert ! ( index_types. contains( & FieldIndexType :: IntMatch ) ) ;
626+ assert ! ( index_types. contains( & FieldIndexType :: IntRange ) ) ;
627+ }
628+ }
0 commit comments