Skip to content

Commit addddb8

Browse files
authored
Add file-contents-sorter as builtin hook (#1846)
Closes #1843
1 parent cb3f71c commit addddb8

8 files changed

Lines changed: 307 additions & 0 deletions

File tree

crates/prek/src/config.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ enum FilePatternWireError {
174174
#[derive(Debug, Clone, Deserialize)]
175175
#[serde(try_from = "FilePatternWire")]
176176
pub(crate) enum FilePattern {
177+
Never,
177178
Regex(Regex),
178179
Glob(GlobPatterns),
179180
}
@@ -189,6 +190,7 @@ impl FilePattern {
189190

190191
pub(crate) fn is_match(&self, str: &str) -> bool {
191192
match self {
193+
FilePattern::Never => false,
192194
FilePattern::Regex(regex) => regex.is_match(str).unwrap_or(false),
193195
FilePattern::Glob(globs) => globs.is_match(str),
194196
}
@@ -198,6 +200,7 @@ impl FilePattern {
198200
impl Display for FilePattern {
199201
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200202
match self {
203+
FilePattern::Never => f.write_str("never"),
201204
FilePattern::Regex(regex) => write!(f, "regex: {}", regex.as_str()),
202205
FilePattern::Glob(globs) => {
203206
let patterns = globs.patterns.iter().join(", ");
@@ -1464,6 +1467,14 @@ mod tests {
14641467
assert!(!pattern.is_match("tests/main.rs"));
14651468
}
14661469

1470+
#[test]
1471+
fn file_pattern_never_matches() {
1472+
let pattern = FilePattern::Never;
1473+
assert!(!pattern.is_match(""));
1474+
assert!(!pattern.is_match("foo.txt"));
1475+
assert!(!pattern.is_match("nested/path.rs"));
1476+
}
1477+
14671478
#[test]
14681479
fn empty_glob_list_matches_nothing() {
14691480
let pattern = serde_saphyr::from_str::<FilePattern>("glob: []").unwrap();

crates/prek/src/hooks/builtin_hooks/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ pub(crate) enum BuiltinHooks {
4242
CheckYaml,
4343
DetectPrivateKey,
4444
EndOfFileFixer,
45+
FileContentsSorter,
4546
FixByteOrderMarker,
4647
MixedLineEnding,
4748
NoCommitToBranch,
@@ -82,6 +83,9 @@ impl BuiltinHooks {
8283
Self::CheckYaml => pre_commit_hooks::check_yaml(hook, filenames).await,
8384
Self::DetectPrivateKey => pre_commit_hooks::detect_private_key(hook, filenames).await,
8485
Self::EndOfFileFixer => pre_commit_hooks::fix_end_of_file(hook, filenames).await,
86+
Self::FileContentsSorter => {
87+
pre_commit_hooks::file_contents_sorter(hook, filenames).await
88+
}
8589
Self::FixByteOrderMarker => {
8690
pre_commit_hooks::fix_byte_order_marker(hook, filenames).await
8791
}
@@ -276,6 +280,20 @@ impl BuiltinHook {
276280
..Default::default()
277281
},
278282
},
283+
BuiltinHooks::FileContentsSorter => BuiltinHook {
284+
id: "file-contents-sorter".to_string(),
285+
name: "file contents sorter".to_string(),
286+
entry: "file-contents-sorter".to_string(),
287+
priority: None,
288+
options: HookOptions {
289+
description: Some(
290+
"sorts the lines in specified files (defaults to alphabetical)."
291+
.to_string(),
292+
),
293+
files: Some(FilePattern::Never),
294+
..Default::default()
295+
},
296+
},
279297
BuiltinHooks::FixByteOrderMarker => BuiltinHook {
280298
id: "fix-byte-order-marker".to_string(),
281299
name: "fix utf-8 byte order marker".to_string(),
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
use std::path::Path;
2+
3+
use anyhow::Result;
4+
use bstr::ByteSlice;
5+
use clap::Parser;
6+
7+
use crate::hook::Hook;
8+
use crate::hooks::run_concurrent_file_checks;
9+
use crate::run::CONCURRENCY;
10+
11+
#[derive(Parser)]
12+
#[command(disable_help_subcommand = true)]
13+
#[command(disable_version_flag = true)]
14+
#[command(disable_help_flag = true)]
15+
struct Args {
16+
#[arg(long, conflicts_with = "unique")]
17+
ignore_case: bool,
18+
#[arg(long, conflicts_with = "ignore_case")]
19+
unique: bool,
20+
}
21+
22+
pub(crate) async fn file_contents_sorter(
23+
hook: &Hook,
24+
filenames: &[&Path],
25+
) -> Result<(i32, Vec<u8>)> {
26+
let args = Args::try_parse_from(hook.entry.split()?.iter().chain(&hook.args))?;
27+
let file_base = hook.project().relative_path();
28+
29+
run_concurrent_file_checks(filenames.iter().copied(), *CONCURRENCY, |filename| {
30+
sort_file(file_base, filename, args.ignore_case, args.unique)
31+
})
32+
.await
33+
}
34+
35+
async fn sort_file(
36+
file_base: &Path,
37+
filename: &Path,
38+
ignore_case: bool,
39+
unique: bool,
40+
) -> Result<(i32, Vec<u8>)> {
41+
let file_path = file_base.join(filename);
42+
let before = fs_err::tokio::read(&file_path).await?;
43+
let after = sorted_contents(&before, ignore_case, unique);
44+
45+
if before == after {
46+
return Ok((0, Vec::new()));
47+
}
48+
49+
fs_err::tokio::write(&file_path, &after).await?;
50+
Ok((1, format!("Sorting {}\n", filename.display()).into_bytes()))
51+
}
52+
53+
fn sorted_contents(before: &[u8], ignore_case: bool, unique: bool) -> Vec<u8> {
54+
let mut lines = before
55+
.split_inclusive(|&byte| byte == b'\n')
56+
.filter_map(normalize_line)
57+
.collect::<Vec<_>>();
58+
59+
if ignore_case {
60+
lines.sort_by(|left, right| cmp_ignore_ascii_case(left, right));
61+
} else {
62+
lines.sort_unstable();
63+
if unique {
64+
lines.dedup();
65+
}
66+
}
67+
68+
if lines.is_empty() {
69+
return Vec::new();
70+
}
71+
72+
let mut after =
73+
Vec::with_capacity(lines.iter().map(|line| line.len()).sum::<usize>() + lines.len());
74+
for line in lines {
75+
after.extend_from_slice(line);
76+
after.push(b'\n');
77+
}
78+
after
79+
}
80+
81+
fn normalize_line(mut line: &[u8]) -> Option<&[u8]> {
82+
line = line.trim_end_with(|byte| matches!(byte, '\n' | '\r'));
83+
84+
// Drop empty and whitespace-only lines.
85+
if line.trim_ascii().is_empty() {
86+
None
87+
} else {
88+
Some(line)
89+
}
90+
}
91+
92+
fn cmp_ignore_ascii_case(left: &[u8], right: &[u8]) -> std::cmp::Ordering {
93+
left.iter()
94+
.map(u8::to_ascii_lowercase)
95+
.cmp(right.iter().map(u8::to_ascii_lowercase))
96+
}
97+
98+
#[cfg(test)]
99+
mod tests {
100+
use super::*;
101+
102+
use std::path::PathBuf;
103+
use tempfile::tempdir;
104+
105+
async fn create_test_file(
106+
dir: &tempfile::TempDir,
107+
name: &str,
108+
content: &[u8],
109+
) -> Result<PathBuf> {
110+
let file_path = dir.path().join(name);
111+
fs_err::tokio::write(&file_path, content).await?;
112+
Ok(file_path)
113+
}
114+
115+
#[test]
116+
fn test_sorted_contents_sorts_and_drops_blank_lines() {
117+
let before = b"beta\n\n \nalpha\r\n";
118+
let after = sorted_contents(before, false, false);
119+
assert_eq!(after, b"alpha\nbeta\n");
120+
}
121+
122+
#[test]
123+
fn test_sorted_contents_ignore_case() {
124+
let before = b"Banana\napple\nApricot\n";
125+
let after = sorted_contents(before, true, false);
126+
assert_eq!(after, b"apple\nApricot\nBanana\n");
127+
}
128+
129+
#[test]
130+
fn test_sorted_contents_ignore_case_is_stable_for_equal_keys() {
131+
let before = b"Apple\napple\n";
132+
let after = sorted_contents(before, true, false);
133+
assert_eq!(after, b"Apple\napple\n");
134+
}
135+
136+
#[test]
137+
fn test_sorted_contents_unique() {
138+
let before = b"beta\nalpha\nbeta\n";
139+
let after = sorted_contents(before, false, true);
140+
assert_eq!(after, b"alpha\nbeta\n");
141+
}
142+
143+
#[tokio::test]
144+
async fn test_sort_file_modifies_unsorted_file() -> Result<()> {
145+
let dir = tempdir()?;
146+
let relative = PathBuf::from("allowlist.txt");
147+
let file_path = create_test_file(&dir, "allowlist.txt", b"beta\nalpha\n").await?;
148+
149+
let (code, output) = sort_file(dir.path(), &relative, false, false).await?;
150+
151+
assert_eq!(code, 1);
152+
assert_eq!(String::from_utf8(output)?, "Sorting allowlist.txt\n");
153+
assert_eq!(fs_err::tokio::read(&file_path).await?, b"alpha\nbeta\n");
154+
155+
Ok(())
156+
}
157+
158+
#[tokio::test]
159+
async fn test_sort_file_keeps_sorted_file() -> Result<()> {
160+
let dir = tempdir()?;
161+
let relative = PathBuf::from("allowlist.txt");
162+
let file_path = create_test_file(&dir, "allowlist.txt", b"alpha\nbeta\n").await?;
163+
164+
let (code, output) = sort_file(dir.path(), &relative, false, false).await?;
165+
166+
assert_eq!(code, 0);
167+
assert!(output.is_empty());
168+
assert_eq!(fs_err::tokio::read(&file_path).await?, b"alpha\nbeta\n");
169+
170+
Ok(())
171+
}
172+
}

crates/prek/src/hooks/pre_commit_hooks/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ mod check_vcs_permalinks;
1616
mod check_xml;
1717
mod check_yaml;
1818
mod detect_private_key;
19+
mod file_contents_sorter;
1920
mod fix_byte_order_marker;
2021
mod fix_end_of_file;
2122
mod fix_trailing_whitespace;
@@ -33,6 +34,7 @@ pub(crate) use check_vcs_permalinks::check_vcs_permalinks;
3334
pub(crate) use check_xml::check_xml;
3435
pub(crate) use check_yaml::check_yaml;
3536
pub(crate) use detect_private_key::detect_private_key;
37+
pub(crate) use file_contents_sorter::file_contents_sorter;
3638
pub(crate) use fix_byte_order_marker::fix_byte_order_marker;
3739
pub(crate) use fix_end_of_file::fix_end_of_file;
3840
pub(crate) use fix_trailing_whitespace::fix_trailing_whitespace;
@@ -47,6 +49,7 @@ pub(crate) enum PreCommitHooks {
4749
CheckCaseConflict,
4850
CheckExecutablesHaveShebangs,
4951
CheckVcsPermalinks,
52+
FileContentsSorter,
5053
EndOfFileFixer,
5154
FixByteOrderMarker,
5255
CheckJson,
@@ -79,6 +82,7 @@ impl PreCommitHooks {
7982
check_executables_have_shebangs(hook, filenames).await
8083
}
8184
Self::CheckVcsPermalinks => check_vcs_permalinks(hook, filenames).await,
85+
Self::FileContentsSorter => file_contents_sorter(hook, filenames).await,
8286
Self::EndOfFileFixer => fix_end_of_file(hook, filenames).await,
8387
Self::FixByteOrderMarker => fix_byte_order_marker(hook, filenames).await,
8488
Self::CheckJson => check_json(hook, filenames).await,

crates/prek/tests/builtin_hooks.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,65 @@ fn end_of_file_fixer_hook() -> Result<()> {
149149
Ok(())
150150
}
151151

152+
#[test]
153+
fn file_contents_sorter_hook() -> Result<()> {
154+
let context = TestContext::new();
155+
context.init_project();
156+
157+
context.write_pre_commit_config(indoc::indoc! {r"
158+
repos:
159+
- repo: builtin
160+
hooks:
161+
- id: file-contents-sorter
162+
files: ^allowlist\.txt$
163+
args: [--ignore-case]
164+
"});
165+
166+
let cwd = context.work_dir();
167+
cwd.child("allowlist.txt")
168+
.write_str("Banana\n\napple\nApricot\n")?;
169+
cwd.child("ignored.txt").write_str("zebra\nant\n")?;
170+
171+
context.git_add(".");
172+
173+
cmd_snapshot!(context.filters(), context.run(), @r"
174+
success: false
175+
exit_code: 1
176+
----- stdout -----
177+
file contents sorter.....................................................Failed
178+
- hook id: file-contents-sorter
179+
- exit code: 1
180+
- files were modified by this hook
181+
182+
Sorting allowlist.txt
183+
184+
----- stderr -----
185+
");
186+
187+
assert_snapshot!(context.read("allowlist.txt"), @r"
188+
apple
189+
Apricot
190+
Banana
191+
");
192+
assert_snapshot!(context.read("ignored.txt"), @r"
193+
zebra
194+
ant
195+
");
196+
197+
context.git_add(".");
198+
199+
cmd_snapshot!(context.filters(), context.run(), @r"
200+
success: true
201+
exit_code: 0
202+
----- stdout -----
203+
file contents sorter.....................................................Passed
204+
205+
----- stderr -----
206+
");
207+
208+
Ok(())
209+
}
210+
152211
#[test]
153212
fn check_yaml_hook() -> Result<()> {
154213
let context = TestContext::new();

crates/prek/tests/list_builtins.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ fn list_builtins_basic() {
2424
check-yaml
2525
detect-private-key
2626
end-of-file-fixer
27+
file-contents-sorter
2728
fix-byte-order-marker
2829
mixed-line-ending
2930
no-commit-to-branch
@@ -83,6 +84,9 @@ fn list_builtins_verbose() {
8384
end-of-file-fixer
8485
ensures that a file is either empty, or ends with one newline.
8586
87+
file-contents-sorter
88+
sorts the lines in specified files (defaults to alphabetical).
89+
8690
fix-byte-order-marker
8791
removes utf-8 byte order marker.
8892
@@ -178,6 +182,11 @@ fn list_builtins_json() {
178182
"name": "fix end of files",
179183
"description": "ensures that a file is either empty, or ends with one newline."
180184
},
185+
{
186+
"id": "file-contents-sorter",
187+
"name": "file contents sorter",
188+
"description": "sorts the lines in specified files (defaults to alphabetical)."
189+
},
181190
{
182191
"id": "fix-byte-order-marker",
183192
"name": "fix utf-8 byte order marker",

0 commit comments

Comments
 (0)