Skip to content

Commit 0f109ff

Browse files
committed
Enhance InferenceUsage handling and error processing
1 parent 7a0907a commit 0f109ff

6 files changed

Lines changed: 31 additions & 8 deletions

File tree

lib/api/src/rest/conversions.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::rest::{DenseVector, NamedVectorStruct};
1515

1616
impl From<InferenceUsage> for grpc::InferenceUsage {
1717
fn from(value: InferenceUsage) -> Self {
18-
let mut grpc_usage_models = HashMap::default();
18+
let mut grpc_usage_models = HashMap::with_capacity(value.models.len());
1919
for (model, usage) in value.models {
2020
grpc_usage_models.insert(
2121
model,

lib/api/src/rest/models.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,18 @@ impl Usage {
7070
}
7171
}
7272

73+
impl InferenceUsage {
74+
pub fn merge(mut self, other: InferenceUsage) -> InferenceUsage {
75+
for (model_name, other_usage) in other.models {
76+
self.models
77+
.entry(model_name)
78+
.and_modify(|usage| usage.tokens += other_usage.tokens)
79+
.or_insert(other_usage);
80+
}
81+
self
82+
}
83+
}
84+
7385
fn is_usage_none_or_empty(u: &Option<Usage>) -> bool {
7486
u.as_ref().is_none_or(|usage| usage.is_empty())
7587
}

lib/collection/src/operations/verification/query.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,12 @@ impl StrictModeVerification for CollectionQueryRequestWithUsage {
204204
// check query can perform fullscan when not rescoring
205205
if self.request.prefetch.is_empty() {
206206
query
207-
.check_fullscan(&self.request.using, collection, strict_mode_config)
207+
.check_fullscan(
208+
&self.request.using,
209+
self.request.filter.as_ref(),
210+
collection,
211+
strict_mode_config,
212+
)
208213
.await?;
209214
}
210215
// check for unindexed fields in formula

src/actix/api/snapshot_api.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ async fn recover_partial_snapshot(
616616
let recovery_lock = match try_take_recovery_lock_future.await {
617617
Ok(recovery_lock) => recovery_lock,
618618
Err(err) => {
619-
return helpers::process_response_error(err, tokio::time::Instant::now(), None);
619+
return helpers::process_response_error(err, tokio::time::Instant::now(), None, None);
620620
}
621621
};
622622

@@ -699,7 +699,7 @@ async fn recover_partial_snapshot_from(
699699
let recovery_lock = match try_take_recovery_lock_future.await {
700700
Ok(recovery_lock) => recovery_lock,
701701
Err(err) => {
702-
return helpers::process_response_error(err, tokio::time::Instant::now(), None);
702+
return helpers::process_response_error(err, tokio::time::Instant::now(), None, None);
703703
}
704704
};
705705

src/common/inference/update_requests.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,11 @@ pub async fn convert_batch(
179179
for (name, vecs) in named {
180180
let (converted_vectors, batch_usage) =
181181
convert_vectors(vecs, InferenceType::Update, inference_token.clone()).await?;
182-
if inference_usage.is_none() && batch_usage.is_some() {
183-
inference_usage = batch_usage;
182+
if let Some(batch_usage) = batch_usage {
183+
inference_usage = match inference_usage.take() {
184+
Some(accum) => Some(accum.merge(batch_usage)),
185+
None => Some(batch_usage),
186+
};
184187
}
185188
named_vectors.insert(name, converted_vectors);
186189
}

src/common/update.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,11 @@ pub async fn do_batch_update_points(
692692
};
693693

694694
results.push(current_update_result);
695-
if inference_usage.is_none() && current_operation_usage_opt.is_some() {
696-
inference_usage = current_operation_usage_opt;
695+
if let Some(current_usage) = current_operation_usage_opt {
696+
inference_usage = match inference_usage {
697+
Some(accum) => Some(accum.merge(current_usage)),
698+
None => Some(current_usage),
699+
};
697700
}
698701
}
699702
Ok((results, inference_usage))

0 commit comments

Comments
 (0)