Skip to content

Commit 160becb

Browse files
authored
Remove vectors_count (#7244)
* Remove deprecated vectors count from collection info * Remove vectors count from shard info * Update OpenAPI and gRPC spec * Remove vectors count from example
1 parent 04d1c19 commit 160becb

9 files changed

Lines changed: 2 additions & 44 deletions

File tree

docs/grpc/docs.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,6 @@
603603
| ----- | ---- | ----- | ----------- |
604604
| status | [CollectionStatus](#qdrant-CollectionStatus) | | operating condition of the collection |
605605
| optimizer_status | [OptimizerStatus](#qdrant-OptimizerStatus) | | status of collection optimizers |
606-
| vectors_count | [uint64](#uint64) | optional | Approximate number of vectors in the collection |
607606
| segments_count | [uint64](#uint64) | | Number of independent segments |
608607
| config | [CollectionConfig](#qdrant-CollectionConfig) | | Configuration |
609608
| payload_schema | [CollectionInfo.PayloadSchemaEntry](#qdrant-CollectionInfo-PayloadSchemaEntry) | repeated | Collection data types |

docs/redoc/master/openapi.json

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6440,7 +6440,7 @@
64406440
],
64416441
"info": {
64426442
"title": "Qdrant API",
6443-
"description": "API description for Qdrant vector search engine.\n\nThis document describes CRUD and search operations on collections of points (vectors with payload).\n\nQdrant supports any combinations of `should`, `min_should`, `must` and `must_not` conditions, which makes it possible to use in applications when object could not be described solely by vector. It could be location features, availability flags, and other custom properties businesses should take into account.\n## Examples\nThis examples cover the most basic use-cases - collection creation and basic vector search.\n### Create collection\nFirst - let's create a collection with dot-production metric.\n```\ncurl -X PUT 'http://localhost:6333/collections/test_collection' \\\n -H 'Content-Type: application/json' \\\n --data-raw '{\n \"vectors\": {\n \"size\": 4,\n \"distance\": \"Dot\"\n }\n }'\n\n```\nExpected response:\n```\n{\n \"result\": true,\n \"status\": \"ok\",\n \"time\": 0.031095451\n}\n```\nWe can ensure that collection was created:\n```\ncurl 'http://localhost:6333/collections/test_collection'\n```\nExpected response:\n```\n{\n \"result\": {\n \"status\": \"green\",\n \"vectors_count\": 0,\n \"segments_count\": 5,\n \"disk_data_size\": 0,\n \"ram_data_size\": 0,\n \"config\": {\n \"params\": {\n \"vectors\": {\n \"size\": 4,\n \"distance\": \"Dot\"\n }\n },\n \"hnsw_config\": {\n \"m\": 16,\n \"ef_construct\": 100,\n \"full_scan_threshold\": 10000\n },\n \"optimizer_config\": {\n \"deleted_threshold\": 0.2,\n \"vacuum_min_vector_number\": 1000,\n \"default_segment_number\": 2,\n \"max_segment_size\": null,\n \"memmap_threshold\": null,\n \"indexing_threshold\": 20000,\n \"flush_interval_sec\": 5,\n \"max_optimization_threads\": null\n },\n \"wal_config\": {\n \"wal_capacity_mb\": 32,\n \"wal_segments_ahead\": 0\n }\n }\n },\n \"status\": \"ok\",\n \"time\": 2.1199e-05\n}\n```\n\n### Add points\nLet's now add vectors with some payload:\n```\ncurl -L -X PUT 'http://localhost:6333/collections/test_collection/points?wait=true' \\ -H 'Content-Type: application/json' \\ --data-raw '{\n \"points\": [\n {\"id\": 1, \"vector\": [0.05, 0.61, 0.76, 0.74], \"payload\": {\"city\": \"Berlin\"}},\n {\"id\": 2, \"vector\": [0.19, 0.81, 0.75, 0.11], \"payload\": {\"city\": [\"Berlin\", \"London\"] }},\n {\"id\": 3, \"vector\": [0.36, 0.55, 0.47, 0.94], \"payload\": {\"city\": [\"Berlin\", \"Moscow\"] }},\n {\"id\": 4, \"vector\": [0.18, 0.01, 0.85, 0.80], \"payload\": {\"city\": [\"London\", \"Moscow\"] }},\n {\"id\": 5, \"vector\": [0.24, 0.18, 0.22, 0.44], \"payload\": {\"count\": [0]}},\n {\"id\": 6, \"vector\": [0.35, 0.08, 0.11, 0.44]}\n ]\n}'\n```\nExpected response:\n```\n{\n \"result\": {\n \"operation_id\": 0,\n \"status\": \"completed\"\n },\n \"status\": \"ok\",\n \"time\": 0.000206061\n}\n```\n### Search with filtering\nLet's start with a basic request:\n```\ncurl -L -X POST 'http://localhost:6333/collections/test_collection/points/search' \\ -H 'Content-Type: application/json' \\ --data-raw '{\n \"vector\": [0.2,0.1,0.9,0.7],\n \"top\": 3\n}'\n```\nExpected response:\n```\n{\n \"result\": [\n { \"id\": 4, \"score\": 1.362, \"payload\": null, \"version\": 0 },\n { \"id\": 1, \"score\": 1.273, \"payload\": null, \"version\": 0 },\n { \"id\": 3, \"score\": 1.208, \"payload\": null, \"version\": 0 }\n ],\n \"status\": \"ok\",\n \"time\": 0.000055785\n}\n```\nBut result is different if we add a filter:\n```\ncurl -L -X POST 'http://localhost:6333/collections/test_collection/points/search' \\ -H 'Content-Type: application/json' \\ --data-raw '{\n \"filter\": {\n \"should\": [\n {\n \"key\": \"city\",\n \"match\": {\n \"value\": \"London\"\n }\n }\n ]\n },\n \"vector\": [0.2, 0.1, 0.9, 0.7],\n \"top\": 3\n}'\n```\nExpected response:\n```\n{\n \"result\": [\n { \"id\": 4, \"score\": 1.362, \"payload\": null, \"version\": 0 },\n { \"id\": 2, \"score\": 0.871, \"payload\": null, \"version\": 0 }\n ],\n \"status\": \"ok\",\n \"time\": 0.000093972\n}\n```\n",
6443+
"description": "API description for Qdrant vector search engine.\n\nThis document describes CRUD and search operations on collections of points (vectors with payload).\n\nQdrant supports any combinations of `should`, `min_should`, `must` and `must_not` conditions, which makes it possible to use in applications when object could not be described solely by vector. It could be location features, availability flags, and other custom properties businesses should take into account.\n## Examples\nThis examples cover the most basic use-cases - collection creation and basic vector search.\n### Create collection\nFirst - let's create a collection with dot-production metric.\n```\ncurl -X PUT 'http://localhost:6333/collections/test_collection' \\\n -H 'Content-Type: application/json' \\\n --data-raw '{\n \"vectors\": {\n \"size\": 4,\n \"distance\": \"Dot\"\n }\n }'\n\n```\nExpected response:\n```\n{\n \"result\": true,\n \"status\": \"ok\",\n \"time\": 0.031095451\n}\n```\nWe can ensure that collection was created:\n```\ncurl 'http://localhost:6333/collections/test_collection'\n```\nExpected response:\n```\n{\n \"result\": {\n \"status\": \"green\",\n \"segments_count\": 5,\n \"disk_data_size\": 0,\n \"ram_data_size\": 0,\n \"config\": {\n \"params\": {\n \"vectors\": {\n \"size\": 4,\n \"distance\": \"Dot\"\n }\n },\n \"hnsw_config\": {\n \"m\": 16,\n \"ef_construct\": 100,\n \"full_scan_threshold\": 10000\n },\n \"optimizer_config\": {\n \"deleted_threshold\": 0.2,\n \"vacuum_min_vector_number\": 1000,\n \"default_segment_number\": 2,\n \"max_segment_size\": null,\n \"memmap_threshold\": null,\n \"indexing_threshold\": 20000,\n \"flush_interval_sec\": 5,\n \"max_optimization_threads\": null\n },\n \"wal_config\": {\n \"wal_capacity_mb\": 32,\n \"wal_segments_ahead\": 0\n }\n }\n },\n \"status\": \"ok\",\n \"time\": 2.1199e-05\n}\n```\n\n### Add points\nLet's now add vectors with some payload:\n```\ncurl -L -X PUT 'http://localhost:6333/collections/test_collection/points?wait=true' \\ -H 'Content-Type: application/json' \\ --data-raw '{\n \"points\": [\n {\"id\": 1, \"vector\": [0.05, 0.61, 0.76, 0.74], \"payload\": {\"city\": \"Berlin\"}},\n {\"id\": 2, \"vector\": [0.19, 0.81, 0.75, 0.11], \"payload\": {\"city\": [\"Berlin\", \"London\"] }},\n {\"id\": 3, \"vector\": [0.36, 0.55, 0.47, 0.94], \"payload\": {\"city\": [\"Berlin\", \"Moscow\"] }},\n {\"id\": 4, \"vector\": [0.18, 0.01, 0.85, 0.80], \"payload\": {\"city\": [\"London\", \"Moscow\"] }},\n {\"id\": 5, \"vector\": [0.24, 0.18, 0.22, 0.44], \"payload\": {\"count\": [0]}},\n {\"id\": 6, \"vector\": [0.35, 0.08, 0.11, 0.44]}\n ]\n}'\n```\nExpected response:\n```\n{\n \"result\": {\n \"operation_id\": 0,\n \"status\": \"completed\"\n },\n \"status\": \"ok\",\n \"time\": 0.000206061\n}\n```\n### Search with filtering\nLet's start with a basic request:\n```\ncurl -L -X POST 'http://localhost:6333/collections/test_collection/points/search' \\ -H 'Content-Type: application/json' \\ --data-raw '{\n \"vector\": [0.2,0.1,0.9,0.7],\n \"top\": 3\n}'\n```\nExpected response:\n```\n{\n \"result\": [\n { \"id\": 4, \"score\": 1.362, \"payload\": null, \"version\": 0 },\n { \"id\": 1, \"score\": 1.273, \"payload\": null, \"version\": 0 },\n { \"id\": 3, \"score\": 1.208, \"payload\": null, \"version\": 0 }\n ],\n \"status\": \"ok\",\n \"time\": 0.000055785\n}\n```\nBut result is different if we add a filter:\n```\ncurl -L -X POST 'http://localhost:6333/collections/test_collection/points/search' \\ -H 'Content-Type: application/json' \\ --data-raw '{\n \"filter\": {\n \"should\": [\n {\n \"key\": \"city\",\n \"match\": {\n \"value\": \"London\"\n }\n }\n ]\n },\n \"vector\": [0.2, 0.1, 0.9, 0.7],\n \"top\": 3\n}'\n```\nExpected response:\n```\n{\n \"result\": [\n { \"id\": 4, \"score\": 1.362, \"payload\": null, \"version\": 0 },\n { \"id\": 2, \"score\": 0.871, \"payload\": null, \"version\": 0 }\n ],\n \"status\": \"ok\",\n \"time\": 0.000093972\n}\n```\n",
64446444
"contact": {
64456445
"email": "[email protected]"
64466446
},
@@ -6607,13 +6607,6 @@
66076607
"optimizer_status": {
66086608
"$ref": "#/components/schemas/OptimizersStatus"
66096609
},
6610-
"vectors_count": {
6611-
"description": "DEPRECATED: Approximate number of vectors in collection. All vectors in collection are available for querying. Calculated as `points_count x vectors_per_point`. Where `vectors_per_point` is a number of named vectors in schema.",
6612-
"type": "integer",
6613-
"format": "uint",
6614-
"minimum": 0,
6615-
"nullable": true
6616-
},
66176610
"indexed_vectors_count": {
66186611
"description": "Approximate number of indexed vectors in the collection. Indexed vectors in large segments are faster to query, as it is stored in a specialized vector index.",
66196612
"type": "integer",

lib/api/src/grpc/proto/collections.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ message PayloadSchemaInfo {
557557
message CollectionInfo {
558558
CollectionStatus status = 1; // operating condition of the collection
559559
OptimizerStatus optimizer_status = 2; // status of collection optimizers
560-
optional uint64 vectors_count = 3; // Approximate number of vectors in the collection
560+
reserved 3; // Deprecated
561561
uint64 segments_count = 4; // Number of independent segments
562562
reserved 5; // Deprecated
563563
reserved 6; // Deprecated

lib/api/src/grpc/qdrant.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,9 +1175,6 @@ pub struct CollectionInfo {
11751175
/// status of collection optimizers
11761176
#[prost(message, optional, tag = "2")]
11771177
pub optimizer_status: ::core::option::Option<OptimizerStatus>,
1178-
/// Approximate number of vectors in the collection
1179-
#[prost(uint64, optional, tag = "3")]
1180-
pub vectors_count: ::core::option::Option<u64>,
11811178
/// Number of independent segments
11821179
#[prost(uint64, tag = "4")]
11831180
pub segments_count: u64,

lib/collection/src/collection/collection_ops.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,6 @@ impl Collection {
318318
while let Some(response) = requests.try_next().await? {
319319
info.status = cmp::max(info.status, response.status);
320320
info.optimizer_status = cmp::max(info.optimizer_status, response.optimizer_status);
321-
info.vectors_count = info
322-
.vectors_count
323-
.zip(response.vectors_count)
324-
.map(|(a, b)| a + b);
325321
info.indexed_vectors_count = info
326322
.indexed_vectors_count
327323
.zip(response.indexed_vectors_count)
@@ -340,10 +336,6 @@ impl Collection {
340336
}
341337
}
342338

343-
// Do not display vectors count, as it is an approximate number
344-
// and many users are confused by its behavior
345-
info.vectors_count = None;
346-
347339
Ok(info)
348340
}
349341

lib/collection/src/operations/conversions.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,6 @@ impl From<CollectionInfo> for api::grpc::qdrant::CollectionInfo {
376376
let CollectionInfo {
377377
status,
378378
optimizer_status,
379-
vectors_count,
380379
indexed_vectors_count,
381380
points_count,
382381
segments_count,
@@ -443,7 +442,6 @@ impl From<CollectionInfo> for api::grpc::qdrant::CollectionInfo {
443442
api::grpc::qdrant::OptimizerStatus { ok: false, error }
444443
}
445444
}),
446-
vectors_count: vectors_count.map(|count| count as u64),
447445
indexed_vectors_count: indexed_vectors_count.map(|count| count as u64),
448446
points_count: points_count.map(|count| count as u64),
449447
segments_count: segments_count as u64,
@@ -837,7 +835,6 @@ impl TryFrom<api::grpc::qdrant::GetCollectionInfoResponse> for CollectionInfo {
837835
let api::grpc::qdrant::CollectionInfo {
838836
status,
839837
optimizer_status,
840-
vectors_count,
841838
indexed_vectors_count,
842839
points_count,
843840
segments_count,
@@ -858,7 +855,6 @@ impl TryFrom<api::grpc::qdrant::GetCollectionInfoResponse> for CollectionInfo {
858855
}
859856
}
860857
},
861-
vectors_count: vectors_count.map(|count| count as usize),
862858
indexed_vectors_count: indexed_vectors_count.map(|count| count as usize),
863859
points_count: points_count.map(|count| count as usize),
864860
segments_count: segments_count as usize,

lib/collection/src/operations/types.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,6 @@ pub struct CollectionInfo {
218218
pub status: CollectionStatus,
219219
/// Status of optimizers
220220
pub optimizer_status: OptimizersStatus,
221-
/// DEPRECATED:
222-
/// Approximate number of vectors in collection.
223-
/// All vectors in collection are available for querying.
224-
/// Calculated as `points_count x vectors_per_point`.
225-
/// Where `vectors_per_point` is a number of named vectors in schema.
226-
#[serde(skip_serializing_if = "Option::is_none")]
227-
pub vectors_count: Option<usize>,
228221
/// Approximate number of indexed vectors in the collection.
229222
/// Indexed vectors in large segments are faster to query,
230223
/// as it is stored in a specialized vector index.
@@ -246,7 +239,6 @@ impl CollectionInfo {
246239
Self {
247240
status: CollectionStatus::Green,
248241
optimizer_status: OptimizersStatus::Ok,
249-
vectors_count: Some(0),
250242
indexed_vectors_count: Some(0),
251243
points_count: Some(0),
252244
segments_count: 0,
@@ -261,7 +253,6 @@ impl From<ShardInfoInternal> for CollectionInfo {
261253
let ShardInfoInternal {
262254
status,
263255
optimizer_status,
264-
vectors_count,
265256
indexed_vectors_count,
266257
points_count,
267258
segments_count,
@@ -271,7 +262,6 @@ impl From<ShardInfoInternal> for CollectionInfo {
271262
Self {
272263
status: status.into(),
273264
optimizer_status,
274-
vectors_count: Some(vectors_count),
275265
indexed_vectors_count: Some(indexed_vectors_count),
276266
points_count: Some(points_count),
277267
segments_count,
@@ -288,11 +278,6 @@ pub struct ShardInfoInternal {
288278
pub status: ShardStatus,
289279
/// Status of optimizers
290280
pub optimizer_status: OptimizersStatus,
291-
/// Approximate number of vectors in shard.
292-
/// All vectors in shard are available for querying.
293-
/// Calculated as `points_count x vectors_per_point`.
294-
/// Where `vectors_per_point` is a number of named vectors in schema.
295-
pub vectors_count: usize,
296281
/// Approximate number of indexed vectors in the shard.
297282
/// Indexed vectors in large segments are faster to query,
298283
/// as it is stored in vector index (HNSW).

lib/collection/src/shards/local_shard/mod.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,6 @@ impl LocalShard {
10741074

10751075
pub async fn local_shard_info(&self) -> ShardInfoInternal {
10761076
let collection_config = self.collection_config.read().await.clone();
1077-
let mut vectors_count = 0;
10781077
let mut indexed_vectors_count = 0;
10791078
let mut points_count = 0;
10801079
let mut segments_count = 0;
@@ -1087,7 +1086,6 @@ impl LocalShard {
10871086

10881087
let segment_info = segment.get().read().info();
10891088

1090-
vectors_count += segment_info.num_vectors;
10911089
indexed_vectors_count += segment_info.num_indexed_vectors;
10921090
points_count += segment_info.num_points;
10931091
for (key, val) in segment_info.index_schema {
@@ -1104,7 +1102,6 @@ impl LocalShard {
11041102
ShardInfoInternal {
11051103
status,
11061104
optimizer_status,
1107-
vectors_count,
11081105
indexed_vectors_count,
11091106
points_count,
11101107
segments_count,

openapi/openapi-main.ytt.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ info:
6666
{
6767
"result": {
6868
"status": "green",
69-
"vectors_count": 0,
7069
"segments_count": 5,
7170
"disk_data_size": 0,
7271
"ram_data_size": 0,

0 commit comments

Comments
 (0)