Skip to content

Commit 5235d86

Browse files
generallxzfc
andauthored
measure disk size of directories if available (#7120)
* measure disk size of directories if available * review suggestions * Update comment --------- Co-authored-by: xzfc <[email protected]>
1 parent bf60e96 commit 5235d86

2 files changed

Lines changed: 29 additions & 12 deletions

File tree

lib/collection/src/collection_manager/optimizers/segment_optimizer.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::sync::atomic::AtomicBool;
66

77
use common::budget::{ResourceBudget, ResourcePermit};
88
use common::counter::hardware_counter::HardwareCounterCell;
9-
use common::disk::dir_size;
9+
use common::disk::dir_disk_size;
1010
use io::storage_version::StorageVersion;
1111
use itertools::Itertools;
1212
use parking_lot::lock_api::RwLockWriteGuard;
@@ -117,7 +117,7 @@ pub trait SegmentOptimizer {
117117
let locked_segment = segment.read();
118118

119119
space_occupied =
120-
space_occupied.and_then(|acc| match dir_size(locked_segment.data_path()) {
120+
space_occupied.and_then(|acc| match dir_disk_size(locked_segment.data_path()) {
121121
Ok(size) => Some(size + acc),
122122
Err(err) => {
123123
log::debug!(
@@ -157,10 +157,13 @@ pub trait SegmentOptimizer {
157157

158158
match (space_available, space_needed) {
159159
(Some(space_available), Some(space_needed)) => {
160+
log::debug!(
161+
"Available space: {space_available}, needed for optimization: {space_needed}",
162+
);
160163
if space_available < space_needed {
161-
return Err(CollectionError::service_error(
162-
"Not enough space available for optimization".to_string(),
163-
));
164+
return Err(CollectionError::service_error(format!(
165+
"Not enough space available for optimization, needed: {space_needed}, available: {space_available}"
166+
)));
164167
}
165168
}
166169
_ => {

lib/common/common/src/disk.rs

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,34 @@ use std::path::{Path, PathBuf};
22

33
use walkdir::WalkDir;
44

5-
/// How many bytes a directory takes.
6-
pub fn dir_size(path: impl Into<PathBuf>) -> std::io::Result<u64> {
7-
fn dir_size(mut dir: std::fs::ReadDir) -> std::io::Result<u64> {
5+
/// How many bytes a directory takes on disk.
6+
///
7+
/// Note: on non-unix systems, this function returns the apparent/logical
8+
/// directory size rather than actual disk usage.
9+
pub fn dir_disk_size(path: impl Into<PathBuf>) -> std::io::Result<u64> {
10+
fn dir_disk_size(mut dir: std::fs::ReadDir) -> std::io::Result<u64> {
811
dir.try_fold(0, |acc, file| {
912
let file = file?;
10-
let size = match file.metadata()? {
11-
data if data.is_dir() => dir_size(std::fs::read_dir(file.path())?)?,
12-
data => data.len(),
13+
let metadata = file.metadata()?;
14+
let size = if metadata.is_dir() {
15+
dir_disk_size(std::fs::read_dir(file.path())?)?
16+
} else {
17+
#[cfg(unix)]
18+
{
19+
const BLOCK_SIZE: u64 = 512; // aka DEV_BSIZE
20+
use std::os::unix::fs::MetadataExt;
21+
metadata.blocks() * BLOCK_SIZE
22+
}
23+
#[cfg(not(unix))]
24+
{
25+
metadata.len()
26+
}
1327
};
1428
Ok(acc + size)
1529
})
1630
}
1731

18-
dir_size(std::fs::read_dir(path.into())?)
32+
dir_disk_size(std::fs::read_dir(path.into())?)
1933
}
2034

2135
/// List all files in the given directory recursively.

0 commit comments

Comments
 (0)