@@ -57,6 +57,85 @@ message FeatureView {
5757 string cron = 1 ;
5858 }
5959
60+ // Configuration for vector indexing.
61+ message IndexConfig {
62+ // Configuration options for using brute force search.
63+ message BruteForceConfig {}
64+
65+ // Configuration options for the tree-AH algorithm.
66+ message TreeAHConfig {
67+ // Optional. Number of embeddings on each leaf node. The default value is
68+ // 1000 if not set.
69+ optional int64 leaf_node_embedding_count = 1
70+ [(google.api.field_behavior ) = OPTIONAL ];
71+ }
72+
73+ // The distance measure used in nearest neighbor search.
74+ enum DistanceMeasureType {
75+ // Should not be set.
76+ DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0 ;
77+
78+ // Euclidean (L_2) Distance.
79+ SQUARED_L2_DISTANCE = 1 ;
80+
81+ // Cosine Distance. Defined as 1 - cosine similarity.
82+ //
83+ // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead
84+ // of COSINE distance. Our algorithms have been more optimized for
85+ // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is
86+ // mathematically equivalent to COSINE distance and results in the same
87+ // ranking.
88+ COSINE_DISTANCE = 2 ;
89+
90+ // Dot Product Distance. Defined as a negative of the dot product.
91+ DOT_PRODUCT_DISTANCE = 3 ;
92+ }
93+
94+ // The configuration with regard to the algorithms used for efficient
95+ // search.
96+ oneof algorithm_config {
97+ // Optional. Configuration options for the tree-AH algorithm (Shallow tree
98+ // + Asymmetric Hashing). Please refer to this paper for more details:
99+ // https://arxiv.org/abs/1908.10396
100+ TreeAHConfig tree_ah_config = 6 [(google.api.field_behavior ) = OPTIONAL ];
101+
102+ // Optional. Configuration options for using brute force search, which
103+ // simply implements the standard linear search in the database for each
104+ // query. It is primarily meant for benchmarking and to generate the
105+ // ground truth for approximate search.
106+ BruteForceConfig brute_force_config = 7
107+ [(google.api.field_behavior ) = OPTIONAL ];
108+ }
109+
110+ // Optional. Column of embedding. This column contains the source data to
111+ // create index for vector search. embedding_column must be set when using
112+ // vector search.
113+ string embedding_column = 1 [(google.api.field_behavior ) = OPTIONAL ];
114+
115+ // Optional. Columns of features that're used to filter vector search
116+ // results.
117+ repeated string filter_columns = 2 [(google.api.field_behavior ) = OPTIONAL ];
118+
119+ // Optional. Column of crowding. This column contains crowding attribute
120+ // which is a constraint on a neighbor list produced by
121+ // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1.FeatureOnlineStoreService.SearchNearestEntities]
122+ // to diversify search results. If
123+ // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1.NearestNeighborQuery.per_crowding_attribute_neighbor_count]
124+ // is set to K in
125+ // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1.SearchNearestEntitiesRequest],
126+ // it's guaranteed that no more than K entities of the same crowding
127+ // attribute are returned in the response.
128+ string crowding_column = 3 [(google.api.field_behavior ) = OPTIONAL ];
129+
130+ // Optional. The number of dimensions of the input embedding.
131+ optional int32 embedding_dimension = 4
132+ [(google.api.field_behavior ) = OPTIONAL ];
133+
134+ // Optional. The distance measure used in nearest neighbor search.
135+ DistanceMeasureType distance_measure_type = 5
136+ [(google.api.field_behavior ) = OPTIONAL ];
137+ }
138+
60139 // A Feature Registry source for features that need to be synced to Online
61140 // Store.
62141 message FeatureRegistrySource {
@@ -123,4 +202,10 @@ message FeatureView {
123202 // end of the sync the latest featureValues for each entityId of this
124203 // FeatureView are made ready for online serving.
125204 SyncConfig sync_config = 7 ;
205+
206+ // Optional. Configuration for index preparation for vector search. It
207+ // contains the required configurations to create an index from source data,
208+ // so that approximate nearest neighbor (a.k.a ANN) algorithms search can be
209+ // performed during online serving.
210+ IndexConfig index_config = 15 [(google.api.field_behavior ) = OPTIONAL ];
126211}
0 commit comments