Skip to content

Commit 96aab7e

Browse files
committed
Show total size prek cache gc removed (#1418)
* Show total size `prek cache gc` removed * Tweak
1 parent b6371ac commit 96aab7e

3 files changed

Lines changed: 181 additions & 111 deletions

File tree

crates/prek/src/cli/cache_gc.rs

Lines changed: 156 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use std::fmt::Write;
2+
use std::fmt::{Display, Formatter};
3+
use std::ops::AddAssign;
24
use std::path::Path;
35

46
use anyhow::Result;
@@ -7,11 +9,91 @@ use rustc_hash::FxHashSet;
79
use tracing::{debug, trace, warn};
810

911
use crate::cli::ExitStatus;
12+
use crate::cli::cache_size::{dir_size_bytes, human_readable_bytes};
1013
use crate::config::{self, Error as ConfigError, Language, Repo as ConfigRepo, load_config};
1114
use crate::hook::{HookEnvKey, HookSpec, Repo as HookRepo};
1215
use crate::printer::Printer;
1316
use crate::store::{CacheBucket, Store, ToolBucket};
1417

18+
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
19+
enum RemovalKind {
20+
Repos,
21+
HookEnvs,
22+
Tools,
23+
CacheEntries,
24+
}
25+
26+
impl RemovalKind {
27+
fn as_str(self) -> &'static str {
28+
match self {
29+
RemovalKind::Repos => "repos",
30+
RemovalKind::HookEnvs => "hook envs",
31+
RemovalKind::Tools => "tools",
32+
RemovalKind::CacheEntries => "cache entries",
33+
}
34+
}
35+
}
36+
37+
#[derive(Debug, Clone)]
38+
struct Removal {
39+
kind: RemovalKind,
40+
count: usize,
41+
bytes: u64,
42+
names: Vec<String>,
43+
}
44+
45+
impl Removal {
46+
fn new(kind: RemovalKind) -> Self {
47+
Self {
48+
kind,
49+
count: 0,
50+
bytes: 0,
51+
names: Vec::new(),
52+
}
53+
}
54+
55+
fn is_empty(&self) -> bool {
56+
self.count == 0
57+
}
58+
}
59+
60+
impl Display for Removal {
61+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
62+
write!(f, "{} {}", self.count.cyan().bold(), self.kind.as_str())
63+
}
64+
}
65+
66+
#[derive(Debug, Default)]
67+
struct RemovalSummary {
68+
parts: Vec<String>,
69+
count: usize,
70+
bytes: u64,
71+
}
72+
73+
impl RemovalSummary {
74+
fn is_empty(&self) -> bool {
75+
self.parts.is_empty()
76+
}
77+
78+
fn joined(&self) -> String {
79+
self.parts.join(", ")
80+
}
81+
82+
fn total_bytes(&self) -> u64 {
83+
self.bytes
84+
}
85+
}
86+
87+
impl AddAssign<&Removal> for RemovalSummary {
88+
fn add_assign(&mut self, rhs: &Removal) {
89+
if rhs.count > 0 {
90+
self.parts.push(rhs.to_string());
91+
}
92+
self.count += rhs.count;
93+
self.bytes = self.bytes.saturating_add(rhs.bytes);
94+
}
95+
}
96+
1597
pub(crate) async fn cache_gc(
1698
store: &Store,
1799
dry_run: bool,
@@ -95,99 +177,100 @@ pub(crate) async fn cache_gc(
95177
store.update_tracked_configs(&kept_configs)?;
96178
}
97179

98-
let (removed_repos, removed_repo_names) =
99-
sweep_dir_by_name(&store.repos_dir(), &used_repo_keys, dry_run, verbose).await?;
100-
let (removed_hooks, removed_hook_names) =
101-
sweep_dir_by_name(&store.hooks_dir(), &used_hook_env_dirs, dry_run, verbose).await?;
180+
// Sweep repos/<hash>
181+
let removed_repos = sweep_dir_by_name(
182+
RemovalKind::Repos,
183+
&store.repos_dir(),
184+
&used_repo_keys,
185+
dry_run,
186+
verbose,
187+
)?;
188+
189+
// Sweep hooks/<hash>
190+
let removed_hooks = sweep_dir_by_name(
191+
RemovalKind::HookEnvs,
192+
&store.hooks_dir(),
193+
&used_hook_env_dirs,
194+
dry_run,
195+
verbose,
196+
)?;
102197

103198
// Sweep tools/<bucket>
104199
let tools_root = store.tools_dir();
105200
let used_tool_names: FxHashSet<String> =
106201
used_tools.iter().map(|t| t.as_str().to_string()).collect();
107-
let (removed_tools, removed_tool_names) =
108-
sweep_dir_by_name(&tools_root, &used_tool_names, dry_run, verbose).await?;
202+
let removed_tools = sweep_dir_by_name(
203+
RemovalKind::Tools,
204+
&tools_root,
205+
&used_tool_names,
206+
dry_run,
207+
verbose,
208+
)?;
109209

110210
// Sweep cache/<bucket>
111211
let cache_root = store.cache_dir();
112212
let used_cache_names: FxHashSet<String> =
113213
used_cache.iter().map(|c| c.as_str().to_string()).collect();
114-
let (removed_cache, removed_cache_names) =
115-
sweep_dir_by_name(&cache_root, &used_cache_names, dry_run, verbose).await?;
214+
let removed_cache = sweep_dir_by_name(
215+
RemovalKind::CacheEntries,
216+
&cache_root,
217+
&used_cache_names,
218+
dry_run,
219+
verbose,
220+
)?;
116221

117222
// Always clear scratch, as it is only temporary data.
118223
if !dry_run {
119-
let _ = remove_dir_if_exists(&store.scratch_path()).await?;
224+
let _ = fs_err::remove_dir_all(store.scratch_path());
120225
}
121226
// NOTE: Do not clear `patches/` here. It can contain user-important temporary patches.
122227
// A future enhancement could implement a safer cleanup strategy (e.g. GC patches older
123228
// than a configurable age, or only remove patches known to be orphaned).
124-
// let _ = remove_dir_if_exists(&store.patches_dir()).await?;
229+
// let _ = fs_err::remove_dir_all(store.patches_dir())?;
125230

126-
let mut removed = Vec::new();
127-
if removed_repos > 0 {
128-
removed.push(format!("{} repos", removed_repos.cyan().bold()));
129-
}
130-
if removed_hooks > 0 {
131-
removed.push(format!("{} hook envs", removed_hooks.cyan().bold()));
132-
}
133-
if removed_tools > 0 {
134-
removed.push(format!("{} tools", removed_tools.cyan().bold()));
135-
}
136-
if removed_cache > 0 {
137-
removed.push(format!("{} cache entries", removed_cache.cyan().bold()));
138-
}
231+
let mut removed = RemovalSummary::default();
232+
removed += &removed_repos;
233+
removed += &removed_hooks;
234+
removed += &removed_tools;
235+
removed += &removed_cache;
236+
237+
let removed_total_bytes = removed.total_bytes();
238+
let (removed_bytes, removed_unit) = human_readable_bytes(removed_total_bytes);
139239

140240
let verb = if dry_run { "Would remove" } else { "Removed" };
141241
if removed.is_empty() {
142242
writeln!(printer.stdout(), "{}", "Nothing to clean".bold())?;
143243
} else {
144-
writeln!(printer.stdout(), "{verb} {}", removed.join(", "))?;
244+
writeln!(
245+
printer.stdout(),
246+
"{verb} {} ({}{removed_unit})",
247+
removed.joined(),
248+
format!("{removed_bytes:.1}").cyan().bold(),
249+
)?;
145250

146251
if verbose {
147-
if removed_repos > 0 {
148-
print_removed_details(printer, verb, removed_repos, "repos", removed_repo_names)?;
149-
}
150-
if removed_hooks > 0 {
151-
print_removed_details(
152-
printer,
153-
verb,
154-
removed_hooks,
155-
"hook envs",
156-
removed_hook_names,
157-
)?;
158-
}
159-
if removed_tools > 0 {
160-
print_removed_details(printer, verb, removed_tools, "tools", removed_tool_names)?;
161-
}
162-
if removed_cache > 0 {
163-
print_removed_details(
164-
printer,
165-
verb,
166-
removed_cache,
167-
"cache entries",
168-
removed_cache_names,
169-
)?;
170-
}
252+
print_removed_details(printer, verb, removed_repos)?;
253+
print_removed_details(printer, verb, removed_hooks)?;
254+
print_removed_details(printer, verb, removed_tools)?;
255+
print_removed_details(printer, verb, removed_cache)?;
171256
}
172257
}
173258

174259
Ok(ExitStatus::Success)
175260
}
176261

177-
fn print_removed_details(
178-
printer: Printer,
179-
verb: &'static str,
180-
count: usize,
181-
title: &'static str,
182-
mut names: Vec<String>,
183-
) -> Result<()> {
184-
names.sort_unstable();
262+
fn print_removed_details(printer: Printer, verb: &str, mut removal: Removal) -> Result<()> {
263+
if removal.count == 0 {
264+
return Ok(());
265+
}
266+
267+
removal.names.sort_unstable();
185268
writeln!(
186269
printer.stdout(),
187270
"\n{}:",
188-
format!("{verb} {} {title}", count.cyan()).bold()
271+
format!("{verb} {} {}", removal.count.cyan(), removal.kind.as_str()).bold()
189272
)?;
190-
for name in names {
273+
for name in removal.names {
191274
writeln!(printer.stdout(), "- {name}")?;
192275
}
193276

@@ -286,33 +369,17 @@ fn mark_tools_and_cache_for_language(
286369
}
287370
}
288371

289-
async fn remove_dir_if_exists(path: &Path) -> Result<bool> {
290-
if !path.exists() {
291-
return Ok(false);
292-
}
293-
if path.is_dir() {
294-
fs_err::tokio::remove_dir_all(path).await?;
295-
} else {
296-
fs_err::tokio::remove_file(path).await?;
297-
}
298-
Ok(true)
299-
}
300-
301-
async fn sweep_dir_by_name(
372+
fn sweep_dir_by_name(
373+
kind: RemovalKind,
302374
root: &Path,
303375
keep_names: &FxHashSet<String>,
304376
dry_run: bool,
305377
collect_names: bool,
306-
) -> Result<(usize, Vec<String>)> {
307-
if !root.exists() {
308-
return Ok((0, Vec::new()));
309-
}
310-
311-
let mut removed = 0usize;
312-
let mut removed_names = Vec::new();
378+
) -> Result<Removal> {
379+
let mut removal = Removal::new(kind);
313380
let entries = match fs_err::read_dir(root) {
314381
Ok(entries) => entries,
315-
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok((0, Vec::new())),
382+
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Removal::new(kind)),
316383
Err(err) => return Err(err.into()),
317384
};
318385

@@ -336,24 +403,28 @@ async fn sweep_dir_by_name(
336403
continue;
337404
}
338405

406+
let entry_bytes = dir_size_bytes(&path);
407+
339408
if dry_run {
340-
removed += 1;
409+
removal.count += 1;
410+
removal.bytes = removal.bytes.saturating_add(entry_bytes);
341411
if collect_names {
342-
removed_names.push(name.to_string());
412+
removal.names.push(name.to_string());
343413
}
344414
continue;
345415
}
346416

347417
// Best-effort cleanup.
348-
if let Err(err) = fs_err::tokio::remove_dir_all(&path).await {
418+
if let Err(err) = fs_err::remove_dir_all(&path) {
349419
warn!(%err, path = %path.display(), "Failed to remove unused cache entry");
350420
} else {
351-
removed += 1;
421+
removal.count += 1;
422+
removal.bytes = removal.bytes.saturating_add(entry_bytes);
352423
if collect_names {
353-
removed_names.push(name.to_string());
424+
removal.names.push(name.to_string());
354425
}
355426
}
356427
}
357428

358-
Ok((removed, removed_names))
429+
Ok(removal)
359430
}

crates/prek/src/cli/cache_size.rs

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::fmt::Write;
2+
use std::path::Path;
23

34
use anyhow::Result;
45

@@ -12,26 +13,8 @@ pub(crate) fn cache_size(
1213
human_readable: bool,
1314
printer: Printer,
1415
) -> Result<ExitStatus> {
15-
if !store.path().exists() {
16-
if human_readable {
17-
writeln!(printer.stdout_important(), "0B")?;
18-
} else {
19-
writeln!(printer.stdout_important(), "0")?;
20-
}
21-
return Ok(ExitStatus::Success);
22-
}
23-
2416
// Walk the entire cache root
25-
let total_bytes: u64 = walkdir::WalkDir::new(store.path())
26-
.follow_links(false)
27-
.into_iter()
28-
.filter_map(Result::ok)
29-
.filter_map(|entry| match entry.metadata() {
30-
Ok(metadata) if metadata.is_file() => Some(metadata.len()),
31-
_ => None,
32-
})
33-
.sum();
34-
17+
let total_bytes = dir_size_bytes(store.path());
3518
if human_readable {
3619
let (bytes, unit) = human_readable_bytes(total_bytes);
3720
writeln!(printer.stdout_important(), "{bytes:.1}{unit}")?;
@@ -51,9 +34,25 @@ pub(crate) fn cache_size(
5134
clippy::cast_precision_loss,
5235
clippy::cast_sign_loss
5336
)]
54-
pub fn human_readable_bytes(bytes: u64) -> (f32, &'static str) {
37+
pub(crate) fn human_readable_bytes(bytes: u64) -> (f32, &'static str) {
5538
const UNITS: [&str; 7] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"];
5639
let bytes_f32 = bytes as f32;
5740
let i = ((bytes_f32.log2() / 10.0) as usize).min(UNITS.len() - 1);
5841
(bytes_f32 / 1024_f32.powi(i as i32), UNITS[i])
5942
}
43+
44+
pub(crate) fn dir_size_bytes(path: &Path) -> u64 {
45+
if !path.exists() {
46+
return 0;
47+
}
48+
49+
walkdir::WalkDir::new(path)
50+
.follow_links(false)
51+
.into_iter()
52+
.filter_map(Result::ok)
53+
.filter_map(|entry| match entry.metadata() {
54+
Ok(metadata) if metadata.is_file() => Some(metadata.len()),
55+
_ => None,
56+
})
57+
.sum()
58+
}

0 commit comments

Comments
 (0)