Skip to content

Commit cb3f71c

Browse files
mvanhornclaudej178
authored
Add check-vcs-permalinks as builtin hook (#1842)
## Summary Add a native Rust fast path for the `check-vcs-permalinks` hook from `pre-commit/pre-commit-hooks`. ## Why this matters Without a fast path, prek must clone the `pre-commit/pre-commit-hooks` repo, install Python, and run the hook script. The native implementation avoids that overhead for a simple regex-based check. ## Changes - New file: `crates/prek/src/hooks/pre_commit_hooks/check_vcs_permalinks.rs` - Port of the [upstream Python implementation](https://github.com/pre-commit/pre-commit-hooks/blob/main/pre_commit_hooks/check_vcs_permalinks.py) - Detects GitHub blob URLs that use a branch name (e.g. `main`, `master`) instead of a commit hash, combined with a `#L` line number anchor - Scans files concurrently using the existing `run_concurrent_file_checks` helper - Reports file path, line number, and offending line content - Includes unit tests for permalinks, branch links, and edge cases - Updated `crates/prek/src/hooks/pre_commit_hooks/mod.rs`: added `CheckVcsPermalinks` variant ## Testing - `cargo check -p prek` passes - Unit tests verify both positive (branch link) and negative (commit hash permalink) cases Fixes #1833 This contribution was developed with AI assistance (Claude Code). --------- Co-authored-by: Matt Van Horn <[email protected]> Co-authored-by: Claude Opus 4.6 <[email protected]> Co-authored-by: Jo <[email protected]>
1 parent 307254d commit cb3f71c

7 files changed

Lines changed: 287 additions & 0 deletions

File tree

crates/prek/src/hooks/builtin_hooks/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ pub(crate) enum BuiltinHooks {
3737
CheckMergeConflict,
3838
CheckSymlinks,
3939
CheckToml,
40+
CheckVcsPermalinks,
4041
CheckXml,
4142
CheckYaml,
4243
DetectPrivateKey,
@@ -74,6 +75,9 @@ impl BuiltinHooks {
7475
}
7576
Self::CheckSymlinks => pre_commit_hooks::check_symlinks(hook, filenames).await,
7677
Self::CheckToml => pre_commit_hooks::check_toml(hook, filenames).await,
78+
Self::CheckVcsPermalinks => {
79+
pre_commit_hooks::check_vcs_permalinks(hook, filenames).await
80+
}
7781
Self::CheckXml => pre_commit_hooks::check_xml(hook, filenames).await,
7882
Self::CheckYaml => pre_commit_hooks::check_yaml(hook, filenames).await,
7983
Self::DetectPrivateKey => pre_commit_hooks::detect_private_key(hook, filenames).await,
@@ -211,6 +215,19 @@ impl BuiltinHook {
211215
..Default::default()
212216
},
213217
},
218+
BuiltinHooks::CheckVcsPermalinks => BuiltinHook {
219+
id: "check-vcs-permalinks".to_string(),
220+
name: "check vcs permalinks".to_string(),
221+
entry: "check-vcs-permalinks".to_string(),
222+
priority: None,
223+
options: HookOptions {
224+
description: Some(
225+
"ensures that links to vcs websites are permalinks.".to_string(),
226+
),
227+
types: Some(tags::TAG_SET_TEXT),
228+
..Default::default()
229+
},
230+
},
214231
BuiltinHooks::CheckXml => BuiltinHook {
215232
id: "check-xml".to_string(),
216233
name: "check xml".to_string(),
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
use std::collections::BTreeSet;
2+
use std::io::Write;
3+
use std::path::Path;
4+
5+
use anyhow::Result;
6+
use clap::Parser;
7+
use fancy_regex::{Regex, escape};
8+
use tokio::io::{AsyncBufReadExt, BufReader};
9+
10+
use crate::hook::Hook;
11+
use crate::hooks::run_concurrent_file_checks;
12+
use crate::run::CONCURRENCY;
13+
14+
#[derive(Parser)]
15+
#[command(disable_help_subcommand = true)]
16+
#[command(disable_version_flag = true)]
17+
#[command(disable_help_flag = true)]
18+
struct Args {
19+
#[arg(long = "additional-github-domain")]
20+
additional_github_domains: Vec<String>,
21+
}
22+
23+
#[derive(Debug)]
24+
struct GithubPermalinkMatcher {
25+
patterns: Vec<Regex>,
26+
}
27+
28+
impl GithubPermalinkMatcher {
29+
fn from_hook(hook: &Hook) -> Result<Self> {
30+
let args = Args::try_parse_from(hook.entry.split()?.iter().chain(&hook.args))?;
31+
Ok(Self::new(args.additional_github_domains))
32+
}
33+
34+
fn new(additional_domains: Vec<String>) -> Self {
35+
let mut domains = BTreeSet::from([String::from("github.com")]);
36+
domains.extend(additional_domains);
37+
38+
let patterns = domains
39+
.into_iter()
40+
.map(|domain| {
41+
let domain = escape(&domain);
42+
let pattern = format!(
43+
r"https://{domain}/[^/ ]+/[^/ ]+/blob/(?![a-fA-F0-9]{{4,64}}/)([^/. ]+)/[^# ]+#L\d+"
44+
);
45+
Regex::new(&pattern).expect("vcs permalink regex must be valid")
46+
})
47+
.collect();
48+
49+
Self { patterns }
50+
}
51+
52+
fn is_non_permalink(&self, line: &[u8]) -> bool {
53+
let line = String::from_utf8_lossy(line);
54+
self.patterns
55+
.iter()
56+
.any(|pattern| pattern.is_match(&line).unwrap_or(false))
57+
}
58+
}
59+
60+
pub(crate) async fn check_vcs_permalinks(
61+
hook: &Hook,
62+
filenames: &[&Path],
63+
) -> Result<(i32, Vec<u8>)> {
64+
let file_base = hook.project().relative_path();
65+
let matcher = GithubPermalinkMatcher::from_hook(hook)?;
66+
67+
run_concurrent_file_checks(filenames.iter().copied(), *CONCURRENCY, |filename| {
68+
check_file(file_base, filename, &matcher)
69+
})
70+
.await
71+
}
72+
73+
async fn check_file(
74+
file_base: &Path,
75+
filename: &Path,
76+
matcher: &GithubPermalinkMatcher,
77+
) -> Result<(i32, Vec<u8>)> {
78+
let path = file_base.join(filename);
79+
let file = fs_err::tokio::File::open(&path).await?;
80+
let mut reader = BufReader::new(file);
81+
82+
let mut retval = 0;
83+
let mut output = Vec::new();
84+
let mut line = Vec::new();
85+
let mut line_number = 0;
86+
87+
while reader.read_until(b'\n', &mut line).await? != 0 {
88+
line_number += 1;
89+
if matcher.is_non_permalink(&line) {
90+
retval = 1;
91+
write!(output, "{}:{}:", filename.display(), line_number)?;
92+
output.write_all(&line)?;
93+
if !line.ends_with(b"\n") {
94+
writeln!(output)?;
95+
}
96+
}
97+
line.clear();
98+
}
99+
100+
if retval != 0 {
101+
writeln!(output)?;
102+
writeln!(output, "Non-permanent github link detected.")?;
103+
writeln!(
104+
output,
105+
"On any page on github press [y] to load a permalink."
106+
)?;
107+
}
108+
109+
Ok((retval, output))
110+
}
111+
112+
#[cfg(test)]
113+
mod tests {
114+
use super::*;
115+
use std::path::PathBuf;
116+
use tempfile::tempdir;
117+
118+
fn matcher(domains: &[&str]) -> GithubPermalinkMatcher {
119+
GithubPermalinkMatcher::new(domains.iter().map(ToString::to_string).collect())
120+
}
121+
122+
#[test]
123+
fn test_permalink_not_flagged() {
124+
let matcher = matcher(&[]);
125+
assert!(
126+
!matcher
127+
.is_non_permalink(b"https://github.com/owner/repo/blob/abc123def456/file.py#L10")
128+
);
129+
assert!(!matcher.is_non_permalink(
130+
b"https://github.com/owner/repo/blob/abcdef1234567890abcdef1234567890abcdef12/src/main.rs#L42",
131+
));
132+
}
133+
134+
#[test]
135+
fn test_branch_link_flagged() {
136+
let matcher = matcher(&[]);
137+
assert!(matcher.is_non_permalink(b"https://github.com/owner/repo/blob/main/file.py#L10"));
138+
assert!(
139+
matcher.is_non_permalink(b"https://github.com/owner/repo/blob/master/src/lib.rs#L5")
140+
);
141+
assert!(
142+
matcher.is_non_permalink(b"https://github.com/owner/repo/blob/develop/README.md#L1")
143+
);
144+
}
145+
146+
#[test]
147+
fn test_no_line_number_not_flagged() {
148+
let matcher = matcher(&[]);
149+
assert!(!matcher.is_non_permalink(b"https://github.com/owner/repo/blob/main/file.py"));
150+
}
151+
152+
#[test]
153+
fn test_additional_github_domain_flagged() {
154+
let matcher = matcher(&["github.example.com"]);
155+
assert!(
156+
matcher
157+
.is_non_permalink(b"https://github.example.com/owner/repo/blob/main/file.py#L10",)
158+
);
159+
}
160+
161+
#[test]
162+
fn test_github_domains_are_deduplicated() {
163+
let matcher = GithubPermalinkMatcher::new(vec![
164+
"github.example.com".to_string(),
165+
"github.com".to_string(),
166+
"github.example.com".to_string(),
167+
]);
168+
assert_eq!(matcher.patterns.len(), 2);
169+
}
170+
171+
#[tokio::test]
172+
async fn test_check_file_with_additional_domain() -> Result<()> {
173+
let dir = tempdir()?;
174+
let file_path = dir.path().join("links.md");
175+
fs_err::tokio::write(
176+
&file_path,
177+
b"https://github.example.com/owner/repo/blob/main/file.py#L10\n",
178+
)
179+
.await?;
180+
181+
let matcher = matcher(&["github.example.com"]);
182+
let relative = PathBuf::from("links.md");
183+
let (code, output) = check_file(dir.path(), &relative, &matcher).await?;
184+
185+
assert_eq!(code, 1);
186+
assert_eq!(
187+
String::from_utf8(output)?,
188+
"links.md:1:https://github.example.com/owner/repo/blob/main/file.py#L10\n\nNon-permanent github link detected.\nOn any page on github press [y] to load a permalink.\n",
189+
);
190+
191+
Ok(())
192+
}
193+
}

crates/prek/src/hooks/pre_commit_hooks/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pub(crate) mod check_json;
1212
mod check_merge_conflict;
1313
mod check_symlinks;
1414
mod check_toml;
15+
mod check_vcs_permalinks;
1516
mod check_xml;
1617
mod check_yaml;
1718
mod detect_private_key;
@@ -28,6 +29,7 @@ pub(crate) use check_json::check_json;
2829
pub(crate) use check_merge_conflict::check_merge_conflict;
2930
pub(crate) use check_symlinks::check_symlinks;
3031
pub(crate) use check_toml::check_toml;
32+
pub(crate) use check_vcs_permalinks::check_vcs_permalinks;
3133
pub(crate) use check_xml::check_xml;
3234
pub(crate) use check_yaml::check_yaml;
3335
pub(crate) use detect_private_key::detect_private_key;
@@ -44,6 +46,7 @@ pub(crate) enum PreCommitHooks {
4446
CheckAddedLargeFiles,
4547
CheckCaseConflict,
4648
CheckExecutablesHaveShebangs,
49+
CheckVcsPermalinks,
4750
EndOfFileFixer,
4851
FixByteOrderMarker,
4952
CheckJson,
@@ -75,6 +78,7 @@ impl PreCommitHooks {
7578
Self::CheckExecutablesHaveShebangs => {
7679
check_executables_have_shebangs(hook, filenames).await
7780
}
81+
Self::CheckVcsPermalinks => check_vcs_permalinks(hook, filenames).await,
7882
Self::EndOfFileFixer => fix_end_of_file(hook, filenames).await,
7983
Self::FixByteOrderMarker => fix_byte_order_marker(hook, filenames).await,
8084
Self::CheckJson => check_json(hook, filenames).await,

crates/prek/tests/builtin_hooks.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,50 @@ fn check_yaml_multiple_document() -> Result<()> {
266266
Ok(())
267267
}
268268

269+
#[test]
270+
fn check_vcs_permalinks_builtin() -> Result<()> {
271+
let context = TestContext::new();
272+
context.init_project();
273+
274+
context.write_pre_commit_config(indoc::indoc! {r"
275+
repos:
276+
- repo: builtin
277+
hooks:
278+
- id: check-vcs-permalinks
279+
args: [--additional-github-domain=github.example.com]
280+
"});
281+
282+
context
283+
.work_dir()
284+
.child("links.md")
285+
.write_str(indoc::indoc! {r"
286+
https://github.com/owner/repo/blob/main/file.py#L10
287+
https://github.example.com/owner/repo/blob/master/src/lib.rs#L5
288+
https://github.com/owner/repo/blob/abcdef1234567890abcdef1234567890abcdef12/file.py#L10
289+
"})?;
290+
291+
context.git_add(".");
292+
293+
cmd_snapshot!(context.filters(), context.run(), @r"
294+
success: false
295+
exit_code: 1
296+
----- stdout -----
297+
check vcs permalinks.....................................................Failed
298+
- hook id: check-vcs-permalinks
299+
- exit code: 1
300+
301+
links.md:1:https://github.com/owner/repo/blob/main/file.py#L10
302+
links.md:2:https://github.example.com/owner/repo/blob/master/src/lib.rs#L5
303+
304+
Non-permanent github link detected.
305+
On any page on github press [y] to load a permalink.
306+
307+
----- stderr -----
308+
");
309+
310+
Ok(())
311+
}
312+
269313
#[test]
270314
fn check_json_hook() -> Result<()> {
271315
let context = TestContext::new();

crates/prek/tests/list_builtins.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ fn list_builtins_basic() {
1919
check-merge-conflict
2020
check-symlinks
2121
check-toml
22+
check-vcs-permalinks
2223
check-xml
2324
check-yaml
2425
detect-private-key
@@ -67,6 +68,9 @@ fn list_builtins_verbose() {
6768
check-toml
6869
checks toml files for parseable syntax.
6970
71+
check-vcs-permalinks
72+
ensures that links to vcs websites are permalinks.
73+
7074
check-xml
7175
checks xml files for parseable syntax.
7276
@@ -149,6 +153,11 @@ fn list_builtins_json() {
149153
"name": "check toml",
150154
"description": "checks toml files for parseable syntax."
151155
},
156+
{
157+
"id": "check-vcs-permalinks",
158+
"name": "check vcs permalinks",
159+
"description": "ensures that links to vcs websites are permalinks."
160+
},
152161
{
153162
"id": "check-xml",
154163
"name": "check xml",

docs/builtin.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Currently, only part of hooks from `https://github.com/pre-commit/pre-commit-hoo
4141
- [`fix-byte-order-marker`](https://github.com/pre-commit/pre-commit-hooks#fix-byte-order-marker) (Remove UTF-8 byte order marker)
4242
- [`check-json`](https://github.com/pre-commit/pre-commit-hooks#check-json) (Validate JSON files)
4343
- [`check-toml`](https://github.com/pre-commit/pre-commit-hooks#check-toml) (Validate TOML files)
44+
- [`check-vcs-permalinks`](https://github.com/pre-commit/pre-commit-hooks#check-vcs-permalinks) (Check that VCS links are permalinks)
4445
- [`check-yaml`](https://github.com/pre-commit/pre-commit-hooks#check-yaml) (Validate YAML files)
4546
- [`check-xml`](https://github.com/pre-commit/pre-commit-hooks#check-xml) (Validate XML files)
4647
- [`mixed-line-ending`](https://github.com/pre-commit/pre-commit-hooks#mixed-line-ending) (Normalize or check line endings)
@@ -98,6 +99,7 @@ For `repo: builtin`, the following hooks are supported:
9899
- [`check-json`](#check-json) (Validate JSON files)
99100
- [`check-json5`](#check-json5) (Validate JSON5 files)
100101
- [`check-toml`](#check-toml) (Validate TOML files)
102+
- [`check-vcs-permalinks`](#check-vcs-permalinks) (Check that VCS links are permalinks)
101103
- [`check-yaml`](#check-yaml) (Validate YAML files)
102104
- [`check-xml`](#check-xml) (Validate XML files)
103105
- [`mixed-line-ending`](#mixed-line-ending) (Normalize or check line endings)
@@ -273,6 +275,23 @@ Attempts to load all TOML files to verify syntax.
273275

274276
---
275277

278+
#### `check-vcs-permalinks`
279+
280+
Ensures that links to VCS websites are permalinks.
281+
282+
**Supported arguments** (compatible with `pre-commit-hooks`):
283+
284+
- `--additional-github-domain=<domain>` (repeatable)
285+
- Adds extra GitHub-style domains to check in addition to the default `github.com`.
286+
287+
**Behavior / caveats**
288+
289+
- Flags links of the form `https://<domain>/<owner>/<repo>/blob/<branch>/...#L...`.
290+
- Does not flag commit-hash permalinks where `<branch>` is already a 4-64 character hexadecimal revision.
291+
- The builtin and fast-path implementations currently follow the upstream hook's GitHub-family matching behavior.
292+
293+
---
294+
276295
#### `check-yaml`
277296

278297
Attempts to load all YAML files to verify syntax.

prek.schema.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)