@@ -8,14 +8,14 @@ use http::{HeaderMap, HeaderValue, Method, Uri};
88use issues:: { Action , Code , ImmediateSolution , Issue , Solution } ;
99use itertools:: Itertools ;
1010use segment:: common:: operation_error:: OperationError ;
11- use segment:: data_types:: index:: { TextIndexParams , TextIndexType , TokenizerType } ;
11+ use segment:: data_types:: index:: IntegerIndexParams ;
1212use segment:: index:: query_optimization:: rescore_formula:: parsed_formula:: VariableId ;
1313use segment:: json_path:: JsonPath ;
1414use segment:: types:: {
1515 AnyVariants , Condition , FieldCondition , Filter , Match , MatchValue , PayloadFieldSchema ,
1616 PayloadKeyType , PayloadSchemaParams , PayloadSchemaType , RangeInterface , UuidPayloadType ,
1717} ;
18- use strum:: IntoEnumIterator as _;
18+ use strum:: { EnumIter , IntoEnumIterator as _} ;
1919
2020use crate :: operations:: universal_query:: formula:: ExpressionInternal ;
2121#[ derive( Debug ) ]
@@ -162,33 +162,33 @@ impl Issue for UnindexedField {
162162}
163163
164164/// Suggest any index, let user choose depending on their data type
165- fn all_indexes ( ) -> impl Iterator < Item = PayloadFieldSchema > {
166- PayloadSchemaType :: iter ( ) . map ( PayloadFieldSchema :: FieldType )
165+ fn all_indexes ( ) -> impl Iterator < Item = FieldIndexType > {
166+ FieldIndexType :: iter ( )
167167}
168168
169- fn infer_schema_from_match_value ( value : & MatchValue ) -> Vec < PayloadFieldSchema > {
169+ fn infer_index_from_match_value ( value : & MatchValue ) -> Vec < FieldIndexType > {
170170 match & value. value {
171171 segment:: types:: ValueVariants :: String ( string) => {
172172 let mut inferred = Vec :: new ( ) ;
173173
174174 if UuidPayloadType :: parse_str ( string) . is_ok ( ) {
175- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Uuid ) )
175+ inferred. push ( FieldIndexType :: UuidMatch )
176176 }
177177
178- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Keyword ) ) ;
178+ inferred. push ( FieldIndexType :: KeywordMatch ) ;
179179
180180 inferred
181181 }
182182 segment:: types:: ValueVariants :: Integer ( _integer) => {
183- vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ]
183+ vec ! [ FieldIndexType :: IntMatch ]
184184 }
185185 segment:: types:: ValueVariants :: Bool ( _boolean) => {
186- vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Bool ) ]
186+ vec ! [ FieldIndexType :: BoolMatch ]
187187 }
188188 }
189189}
190190
191- fn infer_schema_from_any_variants ( value : & AnyVariants ) -> Vec < PayloadFieldSchema > {
191+ fn infer_index_from_any_variants ( value : & AnyVariants ) -> Vec < FieldIndexType > {
192192 match value {
193193 AnyVariants :: Strings ( strings) => {
194194 let mut inferred = Vec :: new ( ) ;
@@ -197,20 +197,20 @@ fn infer_schema_from_any_variants(value: &AnyVariants) -> Vec<PayloadFieldSchema
197197 . iter ( )
198198 . all ( |s| UuidPayloadType :: parse_str ( s) . is_ok ( ) )
199199 {
200- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Uuid ) )
200+ inferred. push ( FieldIndexType :: UuidMatch )
201201 }
202202
203- inferred. push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Keyword ) ) ;
203+ inferred. push ( FieldIndexType :: KeywordMatch ) ;
204204
205205 inferred
206206 }
207207 AnyVariants :: Integers ( _integers) => {
208- vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ]
208+ vec ! [ FieldIndexType :: IntMatch ]
209209 }
210210 }
211211}
212212
213- fn infer_schema_from_field_condition ( field_condition : & FieldCondition ) -> Vec < PayloadFieldSchema > {
213+ fn infer_index_from_field_condition ( field_condition : & FieldCondition ) -> Vec < FieldIndexType > {
214214 let FieldCondition {
215215 key : _key,
216216 r#match,
@@ -223,47 +223,36 @@ fn infer_schema_from_field_condition(field_condition: &FieldCondition) -> Vec<Pa
223223 is_null,
224224 } = field_condition;
225225
226- let mut inferred = Vec :: new ( ) ;
226+ let mut required_indexes = Vec :: new ( ) ;
227227
228228 if let Some ( r#match) = r#match {
229- inferred. extend ( match r#match {
230- Match :: Value ( match_value) => infer_schema_from_match_value ( match_value) ,
231- Match :: Text ( _match_text) => {
232- vec ! [ PayloadFieldSchema :: FieldParams ( PayloadSchemaParams :: Text (
233- TextIndexParams {
234- r#type: TextIndexType :: Text ,
235- tokenizer: TokenizerType :: default ( ) ,
236- min_token_len: None ,
237- max_token_len: None ,
238- lowercase: None ,
239- on_disk: None ,
240- } ,
241- ) ) ]
242- }
243- Match :: Any ( match_any) => infer_schema_from_any_variants ( & match_any. any ) ,
244- Match :: Except ( match_except) => infer_schema_from_any_variants ( & match_except. except ) ,
229+ required_indexes. extend ( match r#match {
230+ Match :: Value ( match_value) => infer_index_from_match_value ( match_value) ,
231+ Match :: Text ( _match_text) => vec ! [ FieldIndexType :: Text ] ,
232+ Match :: Any ( match_any) => infer_index_from_any_variants ( & match_any. any ) ,
233+ Match :: Except ( match_except) => infer_index_from_any_variants ( & match_except. except ) ,
245234 } )
246235 }
247236 if let Some ( range_interface) = range {
248237 match range_interface {
249238 RangeInterface :: DateTime ( _) => {
250- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Datetime ) ) ;
239+ required_indexes . push ( FieldIndexType :: DatetimeRange ) ;
251240 }
252241 RangeInterface :: Float ( _) => {
253- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Float ) ) ;
254- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ) ;
242+ required_indexes . push ( FieldIndexType :: FloatRange ) ;
243+ required_indexes . push ( FieldIndexType :: IntRange ) ;
255244 }
256245 }
257246 }
258247 if geo_bounding_box. is_some ( ) || geo_radius. is_some ( ) || geo_polygon. is_some ( ) {
259- inferred . push ( PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Geo ) ) ;
248+ required_indexes . push ( FieldIndexType :: Geo ) ;
260249 }
261250 if values_count. is_some ( ) || is_empty. is_some ( ) || is_null. is_some ( ) {
262251 // Any index will do, let user choose depending on their data type
263- inferred . extend ( all_indexes ( ) ) ;
252+ required_indexes . extend ( all_indexes ( ) ) ;
264253 }
265254
266- inferred
255+ required_indexes
267256}
268257
269258pub struct IssueExtractor < ' a > {
@@ -309,8 +298,6 @@ impl<'a> IssueExtractor<'a> {
309298pub struct Extractor < ' a > {
310299 payload_schema : & ' a HashMap < PayloadKeyType , PayloadFieldSchema > ,
311300 unindexed_schema : HashMap < PayloadKeyType , Vec < PayloadFieldSchema > > ,
312- needs_match_index : bool ,
313- needs_range_index : bool ,
314301}
315302
316303impl < ' a > Extractor < ' a > {
@@ -322,8 +309,6 @@ impl<'a> Extractor<'a> {
322309 let mut extractor = Self {
323310 payload_schema,
324311 unindexed_schema : HashMap :: new ( ) ,
325- needs_match_index : false ,
326- needs_range_index : false ,
327312 } ;
328313
329314 extractor. update_from_filter ( None , filter) ;
@@ -336,8 +321,6 @@ impl<'a> Extractor<'a> {
336321 Self {
337322 payload_schema,
338323 unindexed_schema : HashMap :: new ( ) ,
339- needs_match_index : false ,
340- needs_range_index : false ,
341324 }
342325 }
343326
@@ -365,14 +348,12 @@ impl<'a> Extractor<'a> {
365348
366349 fn update_from_condition ( & mut self , nested_prefix : Option < & JsonPath > , condition : & Condition ) {
367350 let key;
368- let inferred ;
351+ let required_index ;
369352
370353 match condition {
371354 Condition :: Field ( field_condition) => {
372355 key = & field_condition. key ;
373- self . update_needs_match ( field_condition. r#match . is_some ( ) ) ;
374- self . update_needs_range ( field_condition. range . is_some ( ) ) ;
375- inferred = infer_schema_from_field_condition ( field_condition) ;
356+ required_index = infer_index_from_field_condition ( field_condition) ;
376357 }
377358 Condition :: Filter ( filter) => {
378359 self . update_from_filter ( nested_prefix, filter) ;
@@ -388,11 +369,11 @@ impl<'a> Extractor<'a> {
388369 // Any index will suffice
389370 Condition :: IsEmpty ( is_empty) => {
390371 key = & is_empty. is_empty . key ;
391- inferred = all_indexes ( ) . collect ( ) ;
372+ required_index = all_indexes ( ) . collect ( ) ;
392373 }
393374 Condition :: IsNull ( is_null) => {
394375 key = & is_null. is_null . key ;
395- inferred = all_indexes ( ) . collect ( ) ;
376+ required_index = all_indexes ( ) . collect ( ) ;
396377 }
397378 // No index needed
398379 Condition :: HasId ( _) => return ,
@@ -402,46 +383,23 @@ impl<'a> Extractor<'a> {
402383
403384 let full_key = JsonPath :: extend_or_new ( nested_prefix, key) ;
404385
405- if self . needs_index ( & full_key, & inferred) {
386+ if self . needs_index ( & full_key, & required_index) {
387+ let schemas = required_index. into_iter ( ) . map ( PayloadFieldSchema :: from) ;
406388 self . unindexed_schema
407389 . entry ( full_key)
408390 . or_default ( )
409- . extend ( inferred ) ;
391+ . extend ( schemas ) ;
410392 }
411393 }
412394
413- /// Capture that an index with Match capability is required.
414- /// Does not toggle off if has been set once already.
415- fn update_needs_match ( & mut self , is_match : bool ) {
416- if !self . needs_match_index {
417- self . needs_match_index = is_match
418- }
419- }
420-
421- /// Capture that an index with Match capability is required.
422- /// Does not toggle off if has been set once already.
423- fn update_needs_range ( & mut self , is_range : bool ) {
424- if !self . needs_range_index {
425- self . needs_range_index = is_range
426- }
427- }
428-
429- fn needs_index ( & self , key : & JsonPath , inferred : & [ PayloadFieldSchema ] ) -> bool {
395+ fn needs_index ( & self , key : & JsonPath , required_indexes : & [ FieldIndexType ] ) -> bool {
430396 match self . payload_schema . get ( key) {
431397 Some ( index_info) => {
432- if self . needs_match_index && !index_info. supports_match ( ) {
433- return true ;
434- }
435-
436- if self . needs_range_index && !index_info. supports_range ( ) {
437- return true ;
438- }
439-
440- let index_info_kind = index_info. kind ( ) ;
441- let already_indexed = inferred
398+ // check if the index present has the right capabilities
399+ let index_field_types = schema_capacities ( index_info) ;
400+ let already_indexed = required_indexes
442401 . iter ( )
443- . map ( PayloadFieldSchema :: kind)
444- . any ( |inferred| inferred == index_info_kind) ;
402+ . any ( |required| index_field_types. contains ( required) ) ;
445403
446404 !already_indexed
447405 }
@@ -451,7 +409,7 @@ impl<'a> Extractor<'a> {
451409
452410 pub fn update_from_expression ( & mut self , expression : & ExpressionInternal ) {
453411 let key;
454- let inferred ;
412+ let required_index ;
455413
456414 match expression {
457415 ExpressionInternal :: Constant ( _) => return ,
@@ -466,10 +424,7 @@ impl<'a> Extractor<'a> {
466424 VariableId :: Score ( _) => return ,
467425 VariableId :: Payload ( json_path) => {
468426 key = json_path;
469- inferred = vec ! [
470- PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Integer ) ,
471- PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Float ) ,
472- ] ;
427+ required_index = vec ! [ FieldIndexType :: IntMatch , FieldIndexType :: FloatRange ] ;
473428 }
474429 VariableId :: Condition ( _) => return ,
475430 }
@@ -480,12 +435,12 @@ impl<'a> Extractor<'a> {
480435 }
481436 ExpressionInternal :: GeoDistance { origin : _, to } => {
482437 key = to. clone ( ) ;
483- inferred = vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Geo ) ] ;
438+ required_index = vec ! [ FieldIndexType :: Geo ] ;
484439 }
485440 ExpressionInternal :: Datetime ( _) => return ,
486441 ExpressionInternal :: DatetimeKey ( variable) => {
487442 key = variable. clone ( ) ;
488- inferred = vec ! [ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Datetime ) ] ;
443+ required_index = vec ! [ FieldIndexType :: DatetimeRange ] ;
489444 }
490445 ExpressionInternal :: Mult ( expression_internals) => {
491446 for expr in expression_internals {
@@ -552,11 +507,100 @@ impl<'a> Extractor<'a> {
552507 }
553508 }
554509
555- if self . needs_index ( & key, & inferred) {
510+ if self . needs_index ( & key, & required_index) {
511+ let schemas = required_index. into_iter ( ) . map ( PayloadFieldSchema :: from) ;
556512 self . unindexed_schema
557513 . entry ( key)
558514 . or_default ( )
559- . extend ( inferred) ;
515+ . extend ( schemas) ;
516+ }
517+ }
518+ }
519+
520+ /// All types of internal indexes
521+ #[ derive( Debug , Eq , PartialEq , EnumIter , Hash ) ]
522+ enum FieldIndexType {
523+ IntMatch ,
524+ IntRange ,
525+ KeywordMatch ,
526+ FloatRange ,
527+ Text ,
528+ BoolMatch ,
529+ UuidMatch ,
530+ UuidRange ,
531+ DatetimeRange ,
532+ Geo ,
533+ }
534+
535+ fn schema_capacities ( value : & PayloadFieldSchema ) -> HashSet < FieldIndexType > {
536+ let mut index_types = HashSet :: new ( ) ;
537+ match value {
538+ PayloadFieldSchema :: FieldType ( payload_schema_type) => match payload_schema_type {
539+ PayloadSchemaType :: Keyword => index_types. insert ( FieldIndexType :: KeywordMatch ) ,
540+ PayloadSchemaType :: Integer => index_types. insert ( FieldIndexType :: IntMatch ) ,
541+ PayloadSchemaType :: Uuid => index_types. insert ( FieldIndexType :: UuidMatch ) ,
542+ PayloadSchemaType :: Bool => index_types. insert ( FieldIndexType :: BoolMatch ) ,
543+ PayloadSchemaType :: Float => index_types. insert ( FieldIndexType :: FloatRange ) ,
544+ PayloadSchemaType :: Geo => index_types. insert ( FieldIndexType :: Geo ) ,
545+ PayloadSchemaType :: Text => index_types. insert ( FieldIndexType :: Text ) ,
546+ PayloadSchemaType :: Datetime => index_types. insert ( FieldIndexType :: DatetimeRange ) ,
547+ } ,
548+ PayloadFieldSchema :: FieldParams ( payload_schema_params) => match payload_schema_params {
549+ PayloadSchemaParams :: Keyword ( _) => index_types. insert ( FieldIndexType :: KeywordMatch ) ,
550+ PayloadSchemaParams :: Integer ( integer_index_params) => {
551+ if integer_index_params. lookup == Some ( true ) {
552+ index_types. insert ( FieldIndexType :: IntMatch ) ;
553+ }
554+ if integer_index_params. range == Some ( true ) {
555+ index_types. insert ( FieldIndexType :: IntRange ) ;
556+ }
557+ debug_assert ! (
558+ !index_types. is_empty( ) ,
559+ "lookup or range must be true for Integer payload index"
560+ ) ;
561+ // unifying match arm types
562+ true
563+ }
564+ PayloadSchemaParams :: Uuid ( _) => index_types. insert ( FieldIndexType :: UuidMatch ) ,
565+ PayloadSchemaParams :: Bool ( _) => index_types. insert ( FieldIndexType :: BoolMatch ) ,
566+ PayloadSchemaParams :: Float ( _) => index_types. insert ( FieldIndexType :: FloatRange ) ,
567+ PayloadSchemaParams :: Geo ( _) => index_types. insert ( FieldIndexType :: Geo ) ,
568+ PayloadSchemaParams :: Text ( _) => index_types. insert ( FieldIndexType :: Text ) ,
569+ PayloadSchemaParams :: Datetime ( _) => index_types. insert ( FieldIndexType :: DatetimeRange ) ,
570+ } ,
571+ } ;
572+ index_types
573+ }
574+
575+ impl From < FieldIndexType > for PayloadFieldSchema {
576+ fn from ( val : FieldIndexType ) -> Self {
577+ match val {
578+ FieldIndexType :: IntMatch => {
579+ let params = IntegerIndexParams {
580+ lookup : Some ( true ) ,
581+ ..Default :: default ( )
582+ } ;
583+ PayloadFieldSchema :: FieldParams ( PayloadSchemaParams :: Integer ( params) )
584+ }
585+ FieldIndexType :: IntRange => {
586+ let params = IntegerIndexParams {
587+ range : Some ( true ) ,
588+ ..Default :: default ( )
589+ } ;
590+ PayloadFieldSchema :: FieldParams ( PayloadSchemaParams :: Integer ( params) )
591+ }
592+ FieldIndexType :: KeywordMatch => {
593+ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Keyword )
594+ }
595+ FieldIndexType :: FloatRange => PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Float ) ,
596+ FieldIndexType :: Text => PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Text ) ,
597+ FieldIndexType :: BoolMatch => PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Bool ) ,
598+ FieldIndexType :: UuidMatch => PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Uuid ) ,
599+ FieldIndexType :: UuidRange => PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Uuid ) ,
600+ FieldIndexType :: DatetimeRange => {
601+ PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Datetime )
602+ }
603+ FieldIndexType :: Geo => PayloadFieldSchema :: FieldType ( PayloadSchemaType :: Geo ) ,
560604 }
561605 }
562606}
0 commit comments