|
| 1 | +use std::collections::BTreeSet; |
| 2 | +use std::io::Write; |
| 3 | +use std::path::Path; |
| 4 | + |
| 5 | +use anyhow::Result; |
| 6 | +use clap::Parser; |
| 7 | +use fancy_regex::{Regex, escape}; |
| 8 | +use tokio::io::{AsyncBufReadExt, BufReader}; |
| 9 | + |
| 10 | +use crate::hook::Hook; |
| 11 | +use crate::hooks::run_concurrent_file_checks; |
| 12 | +use crate::run::CONCURRENCY; |
| 13 | + |
| 14 | +#[derive(Parser)] |
| 15 | +#[command(disable_help_subcommand = true)] |
| 16 | +#[command(disable_version_flag = true)] |
| 17 | +#[command(disable_help_flag = true)] |
| 18 | +struct Args { |
| 19 | + #[arg(long = "additional-github-domain")] |
| 20 | + additional_github_domains: Vec<String>, |
| 21 | +} |
| 22 | + |
| 23 | +#[derive(Debug)] |
| 24 | +struct GithubPermalinkMatcher { |
| 25 | + patterns: Vec<Regex>, |
| 26 | +} |
| 27 | + |
| 28 | +impl GithubPermalinkMatcher { |
| 29 | + fn from_hook(hook: &Hook) -> Result<Self> { |
| 30 | + let args = Args::try_parse_from(hook.entry.split()?.iter().chain(&hook.args))?; |
| 31 | + Ok(Self::new(args.additional_github_domains)) |
| 32 | + } |
| 33 | + |
| 34 | + fn new(additional_domains: Vec<String>) -> Self { |
| 35 | + let mut domains = BTreeSet::from([String::from("github.com")]); |
| 36 | + domains.extend(additional_domains); |
| 37 | + |
| 38 | + let patterns = domains |
| 39 | + .into_iter() |
| 40 | + .map(|domain| { |
| 41 | + let domain = escape(&domain); |
| 42 | + let pattern = format!( |
| 43 | + r"https://{domain}/[^/ ]+/[^/ ]+/blob/(?![a-fA-F0-9]{{4,64}}/)([^/. ]+)/[^# ]+#L\d+" |
| 44 | + ); |
| 45 | + Regex::new(&pattern).expect("vcs permalink regex must be valid") |
| 46 | + }) |
| 47 | + .collect(); |
| 48 | + |
| 49 | + Self { patterns } |
| 50 | + } |
| 51 | + |
| 52 | + fn is_non_permalink(&self, line: &[u8]) -> bool { |
| 53 | + let line = String::from_utf8_lossy(line); |
| 54 | + self.patterns |
| 55 | + .iter() |
| 56 | + .any(|pattern| pattern.is_match(&line).unwrap_or(false)) |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +pub(crate) async fn check_vcs_permalinks( |
| 61 | + hook: &Hook, |
| 62 | + filenames: &[&Path], |
| 63 | +) -> Result<(i32, Vec<u8>)> { |
| 64 | + let file_base = hook.project().relative_path(); |
| 65 | + let matcher = GithubPermalinkMatcher::from_hook(hook)?; |
| 66 | + |
| 67 | + run_concurrent_file_checks(filenames.iter().copied(), *CONCURRENCY, |filename| { |
| 68 | + check_file(file_base, filename, &matcher) |
| 69 | + }) |
| 70 | + .await |
| 71 | +} |
| 72 | + |
| 73 | +async fn check_file( |
| 74 | + file_base: &Path, |
| 75 | + filename: &Path, |
| 76 | + matcher: &GithubPermalinkMatcher, |
| 77 | +) -> Result<(i32, Vec<u8>)> { |
| 78 | + let path = file_base.join(filename); |
| 79 | + let file = fs_err::tokio::File::open(&path).await?; |
| 80 | + let mut reader = BufReader::new(file); |
| 81 | + |
| 82 | + let mut retval = 0; |
| 83 | + let mut output = Vec::new(); |
| 84 | + let mut line = Vec::new(); |
| 85 | + let mut line_number = 0; |
| 86 | + |
| 87 | + while reader.read_until(b'\n', &mut line).await? != 0 { |
| 88 | + line_number += 1; |
| 89 | + if matcher.is_non_permalink(&line) { |
| 90 | + retval = 1; |
| 91 | + write!(output, "{}:{}:", filename.display(), line_number)?; |
| 92 | + output.write_all(&line)?; |
| 93 | + if !line.ends_with(b"\n") { |
| 94 | + writeln!(output)?; |
| 95 | + } |
| 96 | + } |
| 97 | + line.clear(); |
| 98 | + } |
| 99 | + |
| 100 | + if retval != 0 { |
| 101 | + writeln!(output)?; |
| 102 | + writeln!(output, "Non-permanent github link detected.")?; |
| 103 | + writeln!( |
| 104 | + output, |
| 105 | + "On any page on github press [y] to load a permalink." |
| 106 | + )?; |
| 107 | + } |
| 108 | + |
| 109 | + Ok((retval, output)) |
| 110 | +} |
| 111 | + |
| 112 | +#[cfg(test)] |
| 113 | +mod tests { |
| 114 | + use super::*; |
| 115 | + use std::path::PathBuf; |
| 116 | + use tempfile::tempdir; |
| 117 | + |
| 118 | + fn matcher(domains: &[&str]) -> GithubPermalinkMatcher { |
| 119 | + GithubPermalinkMatcher::new(domains.iter().map(ToString::to_string).collect()) |
| 120 | + } |
| 121 | + |
| 122 | + #[test] |
| 123 | + fn test_permalink_not_flagged() { |
| 124 | + let matcher = matcher(&[]); |
| 125 | + assert!( |
| 126 | + !matcher |
| 127 | + .is_non_permalink(b"https://github.com/owner/repo/blob/abc123def456/file.py#L10") |
| 128 | + ); |
| 129 | + assert!(!matcher.is_non_permalink( |
| 130 | + b"https://github.com/owner/repo/blob/abcdef1234567890abcdef1234567890abcdef12/src/main.rs#L42", |
| 131 | + )); |
| 132 | + } |
| 133 | + |
| 134 | + #[test] |
| 135 | + fn test_branch_link_flagged() { |
| 136 | + let matcher = matcher(&[]); |
| 137 | + assert!(matcher.is_non_permalink(b"https://github.com/owner/repo/blob/main/file.py#L10")); |
| 138 | + assert!( |
| 139 | + matcher.is_non_permalink(b"https://github.com/owner/repo/blob/master/src/lib.rs#L5") |
| 140 | + ); |
| 141 | + assert!( |
| 142 | + matcher.is_non_permalink(b"https://github.com/owner/repo/blob/develop/README.md#L1") |
| 143 | + ); |
| 144 | + } |
| 145 | + |
| 146 | + #[test] |
| 147 | + fn test_no_line_number_not_flagged() { |
| 148 | + let matcher = matcher(&[]); |
| 149 | + assert!(!matcher.is_non_permalink(b"https://github.com/owner/repo/blob/main/file.py")); |
| 150 | + } |
| 151 | + |
| 152 | + #[test] |
| 153 | + fn test_additional_github_domain_flagged() { |
| 154 | + let matcher = matcher(&["github.example.com"]); |
| 155 | + assert!( |
| 156 | + matcher |
| 157 | + .is_non_permalink(b"https://github.example.com/owner/repo/blob/main/file.py#L10",) |
| 158 | + ); |
| 159 | + } |
| 160 | + |
| 161 | + #[test] |
| 162 | + fn test_github_domains_are_deduplicated() { |
| 163 | + let matcher = GithubPermalinkMatcher::new(vec![ |
| 164 | + "github.example.com".to_string(), |
| 165 | + "github.com".to_string(), |
| 166 | + "github.example.com".to_string(), |
| 167 | + ]); |
| 168 | + assert_eq!(matcher.patterns.len(), 2); |
| 169 | + } |
| 170 | + |
| 171 | + #[tokio::test] |
| 172 | + async fn test_check_file_with_additional_domain() -> Result<()> { |
| 173 | + let dir = tempdir()?; |
| 174 | + let file_path = dir.path().join("links.md"); |
| 175 | + fs_err::tokio::write( |
| 176 | + &file_path, |
| 177 | + b"https://github.example.com/owner/repo/blob/main/file.py#L10\n", |
| 178 | + ) |
| 179 | + .await?; |
| 180 | + |
| 181 | + let matcher = matcher(&["github.example.com"]); |
| 182 | + let relative = PathBuf::from("links.md"); |
| 183 | + let (code, output) = check_file(dir.path(), &relative, &matcher).await?; |
| 184 | + |
| 185 | + assert_eq!(code, 1); |
| 186 | + assert_eq!( |
| 187 | + String::from_utf8(output)?, |
| 188 | + "links.md:1:https://github.example.com/owner/repo/blob/main/file.py#L10\n\nNon-permanent github link detected.\nOn any page on github press [y] to load a permalink.\n", |
| 189 | + ); |
| 190 | + |
| 191 | + Ok(()) |
| 192 | + } |
| 193 | +} |
0 commit comments