|
1 | 1 | #!/usr/bin/env -S deno run --allow-env --allow-net --allow-run --allow-read |
2 | 2 | // To format: npx prettier --write . |
3 | | -import { commit, listFiles } from "npm:@huggingface/[email protected]"; |
4 | | - |
5 | | -const oneMonthAgo = new Date(Date.now() - 30 * 24 * 3600 * 1000); |
6 | | - |
7 | | -const allFiles = listFiles({ |
8 | | - repo: { type: "dataset", name: "hf-doc-build/doc-build-dev" }, |
9 | | - recursive: true, |
10 | | - expand: true, |
11 | | -}); |
12 | | - |
13 | | -const filesToDelete: string[] = []; |
14 | | - |
15 | | -let fileCount = 0; |
16 | | -let filesWithoutDates = 0; |
17 | | - |
18 | | -for await (const file of allFiles) { |
19 | | - fileCount++; |
20 | | - |
21 | | - if (file.type !== "file" || !file.path.endsWith(".zip")) { |
22 | | - continue; |
23 | | - } |
24 | | - |
25 | | - const date = file.lastCommit?.date; |
26 | | - |
27 | | - if (!date) { |
28 | | - filesWithoutDates++; |
29 | | - continue; |
30 | | - } |
31 | | - |
32 | | - if (oneMonthAgo < new Date(date)) { |
33 | | - continue; |
| 3 | +// |
| 4 | +// Cleans up old PR documentation from the HF bucket. |
| 5 | +// Lists all pr_* directories across all packages and deletes those older than 30 days. |
| 6 | + |
| 7 | +const BUCKET_ID = "hf-doc-build/doc-dev"; |
| 8 | +const MAX_AGE_DAYS = 30; |
| 9 | + |
| 10 | +const oneMonthAgo = new Date(Date.now() - MAX_AGE_DAYS * 24 * 3600 * 1000); |
| 11 | +const token = Deno.env.get("HF_ACCESS_TOKEN")!; |
| 12 | +const headers = { Authorization: `Bearer ${token}` }; |
| 13 | + |
| 14 | +// Step 1: List all top-level packages in the bucket |
| 15 | +const packagesRes = await fetch( |
| 16 | + `https://huggingface.co/api/repos/bucket/${BUCKET_ID}/tree?recursive=false`, |
| 17 | + { headers }, |
| 18 | +); |
| 19 | +const packages: { path: string; type: string }[] = await packagesRes.json(); |
| 20 | + |
| 21 | +let totalDeleted = 0; |
| 22 | +let totalKept = 0; |
| 23 | + |
| 24 | +for (const pkg of packages) { |
| 25 | + if (pkg.type !== "directory") continue; |
| 26 | + |
| 27 | + // Step 2: List pr_* directories inside each package |
| 28 | + const entriesRes = await fetch( |
| 29 | + `https://huggingface.co/api/repos/bucket/${BUCKET_ID}/tree?path_prefix=${pkg.path}/&recursive=false`, |
| 30 | + { headers }, |
| 31 | + ); |
| 32 | + const entries: { path: string; type: string; uploadedAt?: string }[] = await entriesRes.json(); |
| 33 | + |
| 34 | + for (const entry of entries) { |
| 35 | + if (entry.type !== "directory" || !entry.path.includes("/pr_")) continue; |
| 36 | + |
| 37 | + const uploadedAt = entry.uploadedAt ? new Date(entry.uploadedAt) : null; |
| 38 | + if (!uploadedAt) continue; |
| 39 | + |
| 40 | + if (uploadedAt < oneMonthAgo) { |
| 41 | + console.log(`Deleting ${entry.path} (uploaded ${uploadedAt.toISOString()})`); |
| 42 | + const proc = new Deno.Command("hf", { |
| 43 | + args: ["buckets", "rm", `hf-doc-build/doc-dev/${entry.path}`, "--recursive", "-y"], |
| 44 | + env: { HF_TOKEN: token }, |
| 45 | + stdout: "piped", |
| 46 | + stderr: "piped", |
| 47 | + }); |
| 48 | + const output = await proc.output(); |
| 49 | + if (!output.success) { |
| 50 | + console.error(`Failed to delete ${entry.path}:`, new TextDecoder().decode(output.stderr)); |
| 51 | + } |
| 52 | + totalDeleted++; |
| 53 | + } else { |
| 54 | + totalKept++; |
| 55 | + } |
34 | 56 | } |
35 | | - |
36 | | - filesToDelete.push(file.path); |
37 | 57 | } |
38 | 58 |
|
39 | | -console.log({fileCount, filesWithoutDates}); |
40 | | - |
41 | | -if (filesToDelete.length) { |
42 | | - console.log("deleting", filesToDelete.length, "files"); |
43 | | - await commit({ |
44 | | - repo: { type: "dataset", name: "hf-doc-build/doc-build-dev" }, |
45 | | - credentials: { accessToken: Deno.env.get("HF_ACCESS_TOKEN") }, |
46 | | - title: "Delete old docs", |
47 | | - operations: filesToDelete.map((file) => ({ |
48 | | - operation: "delete", |
49 | | - path: file, |
50 | | - })), |
51 | | - }); |
52 | | -} |
| 59 | +console.log({ totalDeleted, totalKept }); |
0 commit comments