Skip to content

Commit 5b3b7d7

Browse files
committed
Add inference usage tracking to REST API
1 parent 5169bf2 commit 5b3b7d7

29 files changed

Lines changed: 631 additions & 349 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ tests/storage-compat/compatibility.tar
1616
tests/storage-compat/full-snapshot.snapshot.gz
1717
tests/storage-compat/storage.tar.bz2
1818
venv
19+
.env

lib/api/src/grpc/conversions.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2243,7 +2243,10 @@ impl From<PointsOperationResponseInternal> for PointsOperationResponse {
22432243
Self {
22442244
result: result.map(Into::into),
22452245
time,
2246-
usage: Some(Usage { hardware: usage }),
2246+
usage: Some(Usage {
2247+
hardware: usage,
2248+
inference: None,
2249+
}),
22472250
}
22482251
}
22492252
}

lib/api/src/grpc/ops.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::grpc::{HardwareUsage, Usage};
1+
use crate::grpc::{HardwareUsage, InferenceUsage, Usage};
22

33
impl HardwareUsage {
44
pub fn add(&mut self, other: Self) {
@@ -24,13 +24,22 @@ impl HardwareUsage {
2424

2525
impl Usage {
2626
pub fn is_empty(&self) -> bool {
27-
let Usage { hardware } = self;
27+
let Usage {
28+
hardware,
29+
inference,
30+
} = self;
2831

29-
hardware.is_none()
32+
hardware.is_none() && inference.is_none()
3033
}
3134
}
3235

33-
pub fn usage_or_none(hardware: Option<HardwareUsage>) -> Option<Usage> {
34-
let usage = Usage { hardware };
36+
pub fn usage_or_none(
37+
hardware: Option<HardwareUsage>,
38+
inference: Option<InferenceUsage>,
39+
) -> Option<Usage> {
40+
let usage = Usage {
41+
hardware,
42+
inference,
43+
};
3544
if usage.is_empty() { None } else { Some(usage) }
3645
}

lib/api/src/grpc/proto/points.proto

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,9 +1171,22 @@ message GeoPoint {
11711171
// ---------------------------------------------
11721172
message Usage {
11731173
optional HardwareUsage hardware = 1;
1174+
optional InferenceUsage inference = 2;
11741175
}
11751176

11761177

1178+
// ---------------------------------------------
1179+
// ------------ Inference measurements ----------
1180+
// ---------------------------------------------
1181+
1182+
message InferenceUsage {
1183+
map<string, ModelUsage> model = 1;
1184+
}
1185+
1186+
message ModelUsage {
1187+
uint64 tokens = 1;
1188+
}
1189+
11771190
// ---------------------------------------------
11781191
// ------------ Hardware measurements ----------
11791192
// ---------------------------------------------

lib/api/src/grpc/qdrant.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6573,6 +6573,22 @@ pub struct GeoPoint {
65736573
pub struct Usage {
65746574
#[prost(message, optional, tag = "1")]
65756575
pub hardware: ::core::option::Option<HardwareUsage>,
6576+
#[prost(message, optional, tag = "2")]
6577+
pub inference: ::core::option::Option<InferenceUsage>,
6578+
}
6579+
#[derive(serde::Serialize)]
6580+
#[allow(clippy::derive_partial_eq_without_eq)]
6581+
#[derive(Clone, PartialEq, ::prost::Message)]
6582+
pub struct InferenceUsage {
6583+
#[prost(map = "string, message", tag = "1")]
6584+
pub model: ::std::collections::HashMap<::prost::alloc::string::String, ModelUsage>,
6585+
}
6586+
#[derive(serde::Serialize)]
6587+
#[allow(clippy::derive_partial_eq_without_eq)]
6588+
#[derive(Clone, PartialEq, ::prost::Message)]
6589+
pub struct ModelUsage {
6590+
#[prost(uint64, tag = "1")]
6591+
pub tokens: u64,
65766592
}
65776593
#[derive(serde::Serialize)]
65786594
#[allow(clippy::derive_partial_eq_without_eq)]

lib/api/src/rest/conversions.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,25 @@ use super::{
77
FacetRequestInternal, FacetResponse, FacetValue, FacetValueHit, NearestQuery, OrderByInterface,
88
Query, QueryInterface, VectorOutput, VectorStructOutput,
99
};
10+
use crate::grpc::{InferenceUsage as GrpcInferenceUsage, ModelUsage};
11+
use crate::rest::models::InferenceUsage;
1012
use crate::rest::{DenseVector, NamedVectorStruct};
1113

14+
impl From<InferenceUsage> for GrpcInferenceUsage {
15+
fn from(value: InferenceUsage) -> Self {
16+
let mut grpc_usage = GrpcInferenceUsage::default();
17+
for (model, usage) in value.models {
18+
grpc_usage.model.insert(
19+
model,
20+
ModelUsage {
21+
tokens: usage.tokens,
22+
},
23+
);
24+
}
25+
grpc_usage
26+
}
27+
}
28+
1229
impl From<VectorInternal> for VectorOutput {
1330
fn from(value: VectorInternal) -> Self {
1431
match value {

lib/api/src/rest/models.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
use std::fmt::Debug;
22

3+
use ahash::HashMap;
34
use schemars::JsonSchema;
45
use segment::common::anonymize::Anonymize;
56
use serde;
6-
use serde::Serialize;
7+
use serde::{Deserialize, Serialize};
78

89
pub fn get_git_commit_id() -> Option<String> {
910
option_env!("GIT_COMMIT_ID")
@@ -55,12 +56,17 @@ pub struct ApiResponse<D> {
5556
pub struct Usage {
5657
#[serde(skip_serializing_if = "Option::is_none")]
5758
pub hardware: Option<HardwareUsage>,
59+
pub inference: Option<InferenceUsage>,
5860
}
5961

6062
impl Usage {
6163
pub fn is_empty(&self) -> bool {
62-
let Usage { hardware } = self;
63-
hardware.is_none()
64+
let Usage {
65+
hardware,
66+
inference,
67+
} = self;
68+
69+
hardware.is_none() && inference.is_none()
6470
}
6571
}
6672

@@ -82,6 +88,18 @@ pub struct HardwareUsage {
8288
pub vector_io_write: usize,
8389
}
8490

91+
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
92+
#[serde(rename_all = "snake_case")]
93+
pub struct InferenceUsage {
94+
pub models: HashMap<String, ModelUsage>,
95+
}
96+
97+
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
98+
#[serde(rename_all = "snake_case")]
99+
pub struct ModelUsage {
100+
pub tokens: u64,
101+
}
102+
85103
#[derive(Debug, Serialize, JsonSchema)]
86104
#[serde(rename_all = "snake_case")]
87105
pub struct CollectionDescription {

src/actix/api/collections_api.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ async fn create_collection(
118118
CreateCollectionOperation::new(collection.name.clone(), operation.into_inner());
119119

120120
let Ok(create_collection_op) = create_collection_op else {
121-
return process_response(create_collection_op, timing, None);
121+
return process_response(create_collection_op, timing, None, None);
122122
};
123123

124124
let response = dispatcher
@@ -128,7 +128,7 @@ async fn create_collection(
128128
query.timeout(),
129129
)
130130
.await;
131-
process_response(response, timing, None)
131+
process_response(response, timing, None, None)
132132
}
133133

134134
#[patch("/collections/{name}")]
@@ -151,7 +151,7 @@ async fn update_collection(
151151
query.timeout(),
152152
)
153153
.await;
154-
process_response(response, timing, None)
154+
process_response(response, timing, None, None)
155155
}
156156

157157
#[delete("/collections/{name}")]
@@ -171,7 +171,7 @@ async fn delete_collection(
171171
query.timeout(),
172172
)
173173
.await;
174-
process_response(response, timing, None)
174+
process_response(response, timing, None, None)
175175
}
176176

177177
#[post("/collections/aliases")]
@@ -189,7 +189,7 @@ async fn update_aliases(
189189
query.timeout(),
190190
)
191191
.await;
192-
process_response(response, timing, None)
192+
process_response(response, timing, None, None)
193193
}
194194

195195
#[get("/collections/{name}/cluster")]
@@ -227,7 +227,7 @@ async fn update_collection_cluster(
227227
wait_timeout,
228228
)
229229
.await;
230-
process_response(response, timing, None)
230+
process_response(response, timing, None, None)
231231
}
232232

233233
// Configure services

src/actix/api/count_api.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ async fn count_points(
3737
.await
3838
{
3939
Ok(pass) => pass,
40-
Err(err) => return process_response_error(err, Instant::now(), None),
40+
Err(err) => return process_response_error(err, Instant::now(), None, None),
4141
};
4242

4343
let shard_selector = match shard_key {
@@ -66,5 +66,5 @@ async fn count_points(
6666
)
6767
.await;
6868

69-
helpers::process_response(result, timing, request_hw_counter.to_rest_api())
69+
helpers::process_response(result, timing, request_hw_counter.to_rest_api(), None)
7070
}

src/actix/api/discovery_api.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ async fn discover_points(
4040
.await
4141
{
4242
Ok(pass) => pass,
43-
Err(err) => return process_response_error(err, Instant::now(), None),
43+
Err(err) => return process_response_error(err, Instant::now(), None, None),
4444
};
4545

4646
let shard_selection = match shard_key {
@@ -76,7 +76,7 @@ async fn discover_points(
7676
.collect_vec()
7777
});
7878

79-
helpers::process_response(result, timing, request_hw_counter.to_rest_api())
79+
helpers::process_response(result, timing, request_hw_counter.to_rest_api(), None)
8080
}
8181

8282
#[post("/collections/{name}/points/discover/batch")]
@@ -100,7 +100,7 @@ async fn discover_batch_points(
100100
.await
101101
{
102102
Ok(pass) => pass,
103-
Err(err) => return process_response_error(err, Instant::now(), None),
103+
Err(err) => return process_response_error(err, Instant::now(), None, None),
104104
};
105105

106106
let request_hw_counter = get_request_hardware_counter(
@@ -133,7 +133,7 @@ async fn discover_batch_points(
133133
.collect_vec()
134134
});
135135

136-
helpers::process_response(result, timing, request_hw_counter.to_rest_api())
136+
helpers::process_response(result, timing, request_hw_counter.to_rest_api(), None)
137137
}
138138

139139
pub fn config_discovery_api(cfg: &mut web::ServiceConfig) {

0 commit comments

Comments
 (0)