Skip to content

Commit 12e875c

Browse files
authored
Merge pull request #8538 from dekuu5/fix/stat-string-to-osstr
stat: fix mount point handling for non-UTF8 paths
1 parent 5eb8144 commit 12e875c

File tree

3 files changed

+175
-23
lines changed

3 files changed

+175
-23
lines changed

.vscode/cspell.dictionaries/acronyms+names.wordlist.txt

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# * abbreviations / acronyms
2+
aarch
23
AIX
34
ASLR # address space layout randomization
45
AST # abstract syntax tree
@@ -9,35 +10,34 @@ DevOps
910
Ext3
1011
FIFO
1112
FIFOs
13+
flac
1214
FQDN # fully qualified domain name
1315
GID # group ID
1416
GIDs
1517
GNU
1618
GNUEABI
1719
GNUEABIhf
20+
impls
1821
JFS
22+
loongarch
23+
lzma
1924
MSRV # minimum supported rust version
2025
MSVC
2126
NixOS
2227
POSIX
2328
POSIXLY
29+
ReiserFS
2430
RISC
2531
RISCV
2632
RNG # random number generator
2733
RNGs
28-
ReiserFS
2934
Solaris
3035
UID # user ID
3136
UIDs
3237
UUID # universally unique identifier
3338
WASI
3439
WASM
3540
XFS
36-
aarch
37-
flac
38-
impls
39-
lzma
40-
loongarch
4141

4242
# * names
4343
BusyBox
@@ -48,25 +48,23 @@ Deno
4848
EditorConfig
4949
EPEL
5050
FreeBSD
51+
genric
5152
Gmail
52-
GNU
5353
Illumos
5454
Irix
5555
libfuzzer
56-
MS-DOS
57-
MSDOS
5856
MacOS
5957
MinGW
6058
Minix
59+
MS-DOS
60+
MSDOS
6161
NetBSD
6262
Novell
6363
Nushell
6464
OpenBSD
65-
POSIX
6665
PowerPC
6766
SELinux
6867
SkyPack
69-
Solaris
7068
SysV
7169
Xenix
7270
Yargs

src/uu/stat/src/stat.rs

Lines changed: 120 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,56 @@ fn pad_and_print(result: &str, left: bool, width: usize, padding: Padding) {
111111
}
112112
}
113113

114+
/// Pads and prints raw bytes (Unix-specific) or falls back to string printing
115+
///
116+
/// On Unix systems, this preserves non-UTF8 data by printing raw bytes
117+
/// On other platforms, falls back to lossy string conversion
118+
fn pad_and_print_bytes<W: Write>(
119+
mut writer: W,
120+
bytes: &[u8],
121+
left: bool,
122+
width: usize,
123+
precision: Precision,
124+
) -> Result<(), std::io::Error> {
125+
let display_bytes = match precision {
126+
Precision::Number(p) if p < bytes.len() => &bytes[..p],
127+
_ => bytes,
128+
};
129+
130+
let display_len = display_bytes.len();
131+
let padding_needed = width.saturating_sub(display_len);
132+
133+
let (left_pad, right_pad) = if left {
134+
(0, padding_needed)
135+
} else {
136+
(padding_needed, 0)
137+
};
138+
139+
if left_pad > 0 {
140+
print_padding(&mut writer, left_pad)?;
141+
}
142+
writer.write_all(display_bytes)?;
143+
if right_pad > 0 {
144+
print_padding(&mut writer, right_pad)?;
145+
}
146+
147+
Ok(())
148+
}
149+
150+
/// print padding based on a writer W and n size
151+
/// writer is genric to be any buffer like: `std::io::stdout`
152+
/// n is the calculated padding size
153+
fn print_padding<W: Write>(writer: &mut W, n: usize) -> Result<(), std::io::Error> {
154+
for _ in 0..n {
155+
writer.write_all(b" ")?;
156+
}
157+
Ok(())
158+
}
159+
114160
#[derive(Debug)]
115-
pub enum OutputType {
161+
pub enum OutputType<'a> {
116162
Str(String),
163+
OsStr(&'a OsString),
117164
Integer(i64),
118165
Unsigned(u64),
119166
UnsignedHex(u64),
@@ -306,6 +353,7 @@ fn print_it(output: &OutputType, flags: Flags, width: usize, precision: Precisio
306353

307354
match output {
308355
OutputType::Str(s) => print_str(s, &flags, width, precision),
356+
OutputType::OsStr(s) => print_os_str(s, &flags, width, precision),
309357
OutputType::Integer(num) => print_integer(*num, &flags, width, precision, padding_char),
310358
OutputType::Unsigned(num) => print_unsigned(*num, &flags, width, precision, padding_char),
311359
OutputType::UnsignedOct(num) => {
@@ -354,6 +402,37 @@ fn print_str(s: &str, flags: &Flags, width: usize, precision: Precision) {
354402
pad_and_print(s, flags.left, width, Padding::Space);
355403
}
356404

405+
/// Prints a `OsString` value based on the provided flags, width, and precision.
406+
/// for unix it converts it to bytes then tries to print it if failed print the lossy string version
407+
/// for windows, `OsString` uses UTF-16 internally which doesn't map directly to bytes like Unix,
408+
/// so we fall back to lossy string conversion to handle invalid UTF-8 sequences gracefully
409+
///
410+
/// # Arguments
411+
///
412+
/// * `s` - The `OsString` to be printed.
413+
/// * `flags` - A reference to the Flags struct containing formatting flags.
414+
/// * `width` - The width of the field for the printed string.
415+
/// * `precision` - How many digits of precision, if any.
416+
fn print_os_str(s: &OsString, flags: &Flags, width: usize, precision: Precision) {
417+
#[cfg(unix)]
418+
{
419+
use std::os::unix::ffi::OsStrExt;
420+
421+
let bytes = s.as_bytes();
422+
423+
if pad_and_print_bytes(std::io::stdout(), bytes, flags.left, width, precision).is_err() {
424+
// if an error occurred while trying to print bytes fall back to normal lossy string so it can be printed
425+
let fallback_string = s.to_string_lossy();
426+
print_str(&fallback_string, flags, width, precision);
427+
}
428+
}
429+
#[cfg(not(unix))]
430+
{
431+
let lossy_string = s.to_string_lossy();
432+
print_str(&lossy_string, flags, width, precision);
433+
}
434+
}
435+
357436
fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String {
358437
match quoting_style {
359438
QuotingStyle::Locale | QuotingStyle::Shell => {
@@ -890,16 +969,12 @@ impl Stater {
890969
})
891970
}
892971

893-
fn find_mount_point<P: AsRef<Path>>(&self, p: P) -> Option<String> {
972+
fn find_mount_point<P: AsRef<Path>>(&self, p: P) -> Option<&OsString> {
894973
let path = p.as_ref().canonicalize().ok()?;
895-
896-
for root in self.mount_list.as_ref()? {
897-
if path.starts_with(root) {
898-
// TODO: This is probably wrong, we should pass the OsString
899-
return Some(root.to_string_lossy().into_owned());
900-
}
901-
}
902-
None
974+
self.mount_list
975+
.as_ref()?
976+
.iter()
977+
.find(|root| path.starts_with(root))
903978
}
904979

905980
fn exec(&self) -> i32 {
@@ -993,8 +1068,11 @@ impl Stater {
9931068
'h' => OutputType::Unsigned(meta.nlink()),
9941069
// inode number
9951070
'i' => OutputType::Unsigned(meta.ino()),
996-
// mount point: TODO: This should be an OsStr
997-
'm' => OutputType::Str(self.find_mount_point(file).unwrap()),
1071+
// mount point
1072+
'm' => match self.find_mount_point(file) {
1073+
Some(s) => OutputType::OsStr(s),
1074+
None => OutputType::Str(String::new()),
1075+
},
9981076
// file name
9991077
'n' => OutputType::Str(display_name.to_string()),
10001078
// quoted file name with dereference if symbolic link
@@ -1300,6 +1378,8 @@ fn pretty_time(meta: &Metadata, md_time_field: MetadataTimeField) -> String {
13001378

13011379
#[cfg(test)]
13021380
mod tests {
1381+
use crate::{pad_and_print_bytes, print_padding};
1382+
13031383
use super::{Flags, Precision, ScanUtil, Stater, Token, group_num, precision_trunc};
13041384

13051385
#[test]
@@ -1421,4 +1501,32 @@ mod tests {
14211501
assert_eq!(precision_trunc(123.456, Precision::Number(4)), "123.4560");
14221502
assert_eq!(precision_trunc(123.456, Precision::Number(5)), "123.45600");
14231503
}
1504+
1505+
#[test]
1506+
fn test_pad_and_print_bytes() {
1507+
// testing non-utf8 with normal settings
1508+
let mut buffer = Vec::new();
1509+
let bytes = b"\x80\xFF\x80";
1510+
pad_and_print_bytes(&mut buffer, bytes, false, 3, Precision::NotSpecified).unwrap();
1511+
assert_eq!(&buffer, b"\x80\xFF\x80");
1512+
1513+
// testing left padding
1514+
let mut buffer = Vec::new();
1515+
let bytes = b"\x80\xFF\x80";
1516+
pad_and_print_bytes(&mut buffer, bytes, false, 5, Precision::NotSpecified).unwrap();
1517+
assert_eq!(&buffer, b" \x80\xFF\x80");
1518+
1519+
// testing right padding
1520+
let mut buffer = Vec::new();
1521+
let bytes = b"\x80\xFF\x80";
1522+
pad_and_print_bytes(&mut buffer, bytes, true, 5, Precision::NotSpecified).unwrap();
1523+
assert_eq!(&buffer, b"\x80\xFF\x80 ");
1524+
}
1525+
1526+
#[test]
1527+
fn test_print_padding() {
1528+
let mut buffer = Vec::new();
1529+
print_padding(&mut buffer, 5).unwrap();
1530+
assert_eq!(&buffer, b" ");
1531+
}
14241532
}

tests/by-util/test_stat.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,3 +514,49 @@ fn test_stat_selinux() {
514514
let s: Vec<_> = result.stdout_str().split(':').collect();
515515
assert!(s.len() == 4);
516516
}
517+
518+
#[cfg(unix)]
519+
#[test]
520+
fn test_mount_point_basic() {
521+
let ts = TestScenario::new(util_name!());
522+
let result = ts.ucmd().args(&["-c", "%m", "/"]).succeeds();
523+
let output = result.stdout_str().trim();
524+
assert!(!output.is_empty(), "Mount point should not be empty");
525+
assert_eq!(output, "/");
526+
}
527+
528+
#[cfg(unix)]
529+
#[test]
530+
fn test_mount_point_width_and_alignment() {
531+
let ts = TestScenario::new(util_name!());
532+
533+
// Right-aligned, width 15
534+
let result = ts.ucmd().args(&["-c", "%15m", "/"]).succeeds();
535+
let output = result.stdout_str();
536+
assert!(
537+
output.trim().len() <= 15 && output.len() >= 15,
538+
"Output should be padded to width 15"
539+
);
540+
541+
// Left-aligned, width 15
542+
let result = ts.ucmd().args(&["-c", "%-15m", "/"]).succeeds();
543+
let output = result.stdout_str();
544+
545+
assert!(
546+
output.trim().len() <= 15 && output.len() >= 15,
547+
"Output should be padded to width 15 (left-aligned)"
548+
);
549+
}
550+
551+
#[cfg(unix)]
552+
#[test]
553+
fn test_mount_point_combined_with_other_specifiers() {
554+
let ts = TestScenario::new(util_name!());
555+
let result = ts.ucmd().args(&["-c", "%m %n %s", "/bin/sh"]).succeeds();
556+
let output = result.stdout_str();
557+
let parts: Vec<&str> = output.split_whitespace().collect();
558+
assert!(
559+
parts.len() >= 3,
560+
"Should print mount point, file name, and size"
561+
);
562+
}

0 commit comments

Comments
 (0)