Skip to content

Commit 9ad2de8

Browse files
authored
feat: sync PR docs to HF bucket instead of git dataset (#780)
1 parent 3ffa60f commit 9ad2de8

4 files changed

Lines changed: 71 additions & 53 deletions

File tree

.github/workflows/build_main_documentation.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,13 +234,23 @@ jobs:
234234
235235
cd ..
236236
237-
- name: Push to repositories
237+
- name: Push to dataset (legacy, kept for rollback safety)
238238
run: |
239239
source .venv/bin/activate
240240
cd build_dir
241241
doc-builder push ${{ env.package_name }} --doc_build_repo_id "hf-doc-build/doc-build" --token "${{ secrets.hf_token }}" --commit_msg "Updated with commit ${{ inputs.commit_sha }} See: https://github.com/${{ inputs.repo_owner }}/${{ inputs.package }}/commit/${{ inputs.commit_sha }}" --n_retries 5 --upload_version_yml
242242
cd ..
243243
244+
- name: Sync to bucket
245+
env:
246+
HF_TOKEN: ${{ secrets.hf_token }}
247+
PACKAGE_NAME: ${{ env.package_name }}
248+
run: |
249+
pip install -U huggingface_hub
250+
cd build_dir
251+
hf sync "./$PACKAGE_NAME" "hf://buckets/hf-doc-build/doc/$PACKAGE_NAME"
252+
cd ..
253+
244254
if [ -d "notebook_dir" ]
245255
then
246256
cd notebooks

.github/workflows/delete_old_pr_documentations.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ jobs:
1616
- uses: denoland/setup-deno@v1
1717
with:
1818
deno-version: v1.x
19+
- name: Install hf CLI
20+
run: pip install -U huggingface_hub
1921
- run: deno run --allow-env --allow-net --allow-run --allow-read ./delete-old-prs.ts
2022
env:
2123
HF_ACCESS_TOKEN: ${{ secrets.HF_ACCESS_TOKEN }}

.github/workflows/upload_pr_documentation.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,16 +130,15 @@ jobs:
130130
echo "EOF"
131131
} >> $GITHUB_OUTPUT
132132
133-
- name: Push to repositories
133+
- name: Sync to bucket
134134
shell: bash
135135
env:
136136
PACKAGE_NAME: ${{ inputs.package_name }}
137-
REPO_OWNER: ${{ inputs.repo_owner }}
138-
COMMIT_SHA: ${{ steps.github-context.outputs.commit_sha }}
139137
HF_TOKEN: ${{ secrets.hf_token }}
140138
run: |
141139
cd build_dir
142-
doc-builder push "$PACKAGE_NAME" --doc_build_repo_id "hf-doc-build/doc-build-dev" --token "$HF_TOKEN" --commit_msg "Updated with commit ${COMMIT_SHA} See: https://github.com/${REPO_OWNER}/${PACKAGE_NAME}/commit/${COMMIT_SHA}"
140+
pip install -U huggingface_hub
141+
hf sync "./$PACKAGE_NAME" "hf://buckets/hf-doc-build/doc-dev/$PACKAGE_NAME"
143142
144143
- name: Find doc comment
145144
uses: peter-evans/find-comment@a54c31d7fa095754bfef525c0c8e5e5674c4b4b1 # v2
@@ -162,7 +161,7 @@ jobs:
162161
echo "No authentication method provided"
163162
exit 1
164163
fi
165-
164+
166165
- name: Create comment_bot token
167166
id: comment_bot_token
168167
if: steps.auth.outputs.method == 'github_app'

scripts/delete-old-prs.ts

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,59 @@
11
#!/usr/bin/env -S deno run --allow-env --allow-net --allow-run --allow-read
22
// To format: npx prettier --write .
3-
import { commit, listFiles } from "npm:@huggingface/[email protected]";
4-
5-
const oneMonthAgo = new Date(Date.now() - 30 * 24 * 3600 * 1000);
6-
7-
const allFiles = listFiles({
8-
repo: { type: "dataset", name: "hf-doc-build/doc-build-dev" },
9-
recursive: true,
10-
expand: true,
11-
});
12-
13-
const filesToDelete: string[] = [];
14-
15-
let fileCount = 0;
16-
let filesWithoutDates = 0;
17-
18-
for await (const file of allFiles) {
19-
fileCount++;
20-
21-
if (file.type !== "file" || !file.path.endsWith(".zip")) {
22-
continue;
23-
}
24-
25-
const date = file.lastCommit?.date;
26-
27-
if (!date) {
28-
filesWithoutDates++;
29-
continue;
30-
}
31-
32-
if (oneMonthAgo < new Date(date)) {
33-
continue;
3+
//
4+
// Cleans up old PR documentation from the HF bucket.
5+
// Lists all pr_* directories across all packages and deletes those older than 30 days.
6+
7+
const BUCKET_ID = "hf-doc-build/doc-dev";
8+
const MAX_AGE_DAYS = 30;
9+
10+
const oneMonthAgo = new Date(Date.now() - MAX_AGE_DAYS * 24 * 3600 * 1000);
11+
const token = Deno.env.get("HF_ACCESS_TOKEN")!;
12+
const headers = { Authorization: `Bearer ${token}` };
13+
14+
// Step 1: List all top-level packages in the bucket
15+
const packagesRes = await fetch(
16+
`https://huggingface.co/api/repos/bucket/${BUCKET_ID}/tree?recursive=false`,
17+
{ headers },
18+
);
19+
const packages: { path: string; type: string }[] = await packagesRes.json();
20+
21+
let totalDeleted = 0;
22+
let totalKept = 0;
23+
24+
for (const pkg of packages) {
25+
if (pkg.type !== "directory") continue;
26+
27+
// Step 2: List pr_* directories inside each package
28+
const entriesRes = await fetch(
29+
`https://huggingface.co/api/repos/bucket/${BUCKET_ID}/tree?path_prefix=${pkg.path}/&recursive=false`,
30+
{ headers },
31+
);
32+
const entries: { path: string; type: string; uploadedAt?: string }[] = await entriesRes.json();
33+
34+
for (const entry of entries) {
35+
if (entry.type !== "directory" || !entry.path.includes("/pr_")) continue;
36+
37+
const uploadedAt = entry.uploadedAt ? new Date(entry.uploadedAt) : null;
38+
if (!uploadedAt) continue;
39+
40+
if (uploadedAt < oneMonthAgo) {
41+
console.log(`Deleting ${entry.path} (uploaded ${uploadedAt.toISOString()})`);
42+
const proc = new Deno.Command("hf", {
43+
args: ["buckets", "rm", `hf-doc-build/doc-dev/${entry.path}`, "--recursive", "-y"],
44+
env: { HF_TOKEN: token },
45+
stdout: "piped",
46+
stderr: "piped",
47+
});
48+
const output = await proc.output();
49+
if (!output.success) {
50+
console.error(`Failed to delete ${entry.path}:`, new TextDecoder().decode(output.stderr));
51+
}
52+
totalDeleted++;
53+
} else {
54+
totalKept++;
55+
}
3456
}
35-
36-
filesToDelete.push(file.path);
3757
}
3858

39-
console.log({fileCount, filesWithoutDates});
40-
41-
if (filesToDelete.length) {
42-
console.log("deleting", filesToDelete.length, "files");
43-
await commit({
44-
repo: { type: "dataset", name: "hf-doc-build/doc-build-dev" },
45-
credentials: { accessToken: Deno.env.get("HF_ACCESS_TOKEN") },
46-
title: "Delete old docs",
47-
operations: filesToDelete.map((file) => ({
48-
operation: "delete",
49-
path: file,
50-
})),
51-
});
52-
}
59+
console.log({ totalDeleted, totalKept });

0 commit comments

Comments
 (0)