Skip to content

Commit c0bf284

Browse files
authored
Merge branch 'uutils:main' into id_add_ignore_argument
2 parents 299726f + 12e875c commit c0bf284

File tree

7 files changed

+249
-52
lines changed

7 files changed

+249
-52
lines changed

.github/workflows/GnuTests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ env:
2929
TEST_ROOT_FULL_SUMMARY_FILE: 'gnu-root-full-result.json'
3030
TEST_SELINUX_FULL_SUMMARY_FILE: 'selinux-gnu-full-result.json'
3131
TEST_SELINUX_ROOT_FULL_SUMMARY_FILE: 'selinux-root-gnu-full-result.json'
32-
REPO_GNU_REF: "v9.7"
32+
REPO_GNU_REF: "v9.8"
3333

3434
jobs:
3535
native:

.vscode/cspell.dictionaries/acronyms+names.wordlist.txt

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# * abbreviations / acronyms
2+
aarch
23
AIX
34
ASLR # address space layout randomization
45
AST # abstract syntax tree
@@ -9,35 +10,34 @@ DevOps
910
Ext3
1011
FIFO
1112
FIFOs
13+
flac
1214
FQDN # fully qualified domain name
1315
GID # group ID
1416
GIDs
1517
GNU
1618
GNUEABI
1719
GNUEABIhf
20+
impls
1821
JFS
22+
loongarch
23+
lzma
1924
MSRV # minimum supported rust version
2025
MSVC
2126
NixOS
2227
POSIX
2328
POSIXLY
29+
ReiserFS
2430
RISC
2531
RISCV
2632
RNG # random number generator
2733
RNGs
28-
ReiserFS
2934
Solaris
3035
UID # user ID
3136
UIDs
3237
UUID # universally unique identifier
3338
WASI
3439
WASM
3540
XFS
36-
aarch
37-
flac
38-
impls
39-
lzma
40-
loongarch
4141

4242
# * names
4343
BusyBox
@@ -48,25 +48,23 @@ Deno
4848
EditorConfig
4949
EPEL
5050
FreeBSD
51+
genric
5152
Gmail
52-
GNU
5353
Illumos
5454
Irix
5555
libfuzzer
56-
MS-DOS
57-
MSDOS
5856
MacOS
5957
MinGW
6058
Minix
59+
MS-DOS
60+
MSDOS
6161
NetBSD
6262
Novell
6363
Nushell
6464
OpenBSD
65-
POSIX
6665
PowerPC
6766
SELinux
6867
SkyPack
69-
Solaris
7068
SysV
7169
Xenix
7270
Yargs

src/uu/seq/src/seq.rs

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55
// spell-checker:ignore (ToDO) bigdecimal extendedbigdecimal numberparse hexadecimalfloat biguint
6-
use std::ffi::OsString;
6+
use std::ffi::{OsStr, OsString};
77
use std::io::{BufWriter, ErrorKind, Write, stdout};
88

99
use clap::{Arg, ArgAction, Command};
@@ -39,8 +39,8 @@ const ARG_NUMBERS: &str = "numbers";
3939

4040
#[derive(Clone)]
4141
struct SeqOptions<'a> {
42-
separator: String,
43-
terminator: String,
42+
separator: OsString,
43+
terminator: OsString,
4444
equal_width: bool,
4545
format: Option<&'a str>,
4646
}
@@ -105,14 +105,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
105105

106106
let options = SeqOptions {
107107
separator: matches
108-
.get_one::<String>(OPT_SEPARATOR)
109-
.map_or("\n", |s| s.as_str())
110-
.to_string(),
108+
.get_one::<OsString>(OPT_SEPARATOR)
109+
.map_or(OsString::from("\n"), |s| s.to_os_string()),
111110
terminator: matches
112-
.get_one::<String>(OPT_TERMINATOR)
113-
.map(|s| s.as_str())
114-
.unwrap_or("\n")
115-
.to_string(),
111+
.get_one::<OsString>(OPT_TERMINATOR)
112+
.map_or(OsString::from("\n"), |s| s.to_os_string()),
116113
equal_width: matches.get_flag(OPT_EQUAL_WIDTH),
117114
format: matches.get_one::<String>(OPT_FORMAT).map(|s| s.as_str()),
118115
};
@@ -229,13 +226,15 @@ pub fn uu_app() -> Command {
229226
Arg::new(OPT_SEPARATOR)
230227
.short('s')
231228
.long("separator")
232-
.help(translate!("seq-help-separator")),
229+
.help(translate!("seq-help-separator"))
230+
.value_parser(clap::value_parser!(OsString)),
233231
)
234232
.arg(
235233
Arg::new(OPT_TERMINATOR)
236234
.short('t')
237235
.long("terminator")
238-
.help(translate!("seq-help-terminator")),
236+
.help(translate!("seq-help-terminator"))
237+
.value_parser(clap::value_parser!(OsString)),
239238
)
240239
.arg(
241240
Arg::new(OPT_EQUAL_WIDTH)
@@ -267,8 +266,8 @@ fn fast_print_seq(
267266
first: &BigUint,
268267
increment: u64,
269268
last: &BigUint,
270-
separator: &str,
271-
terminator: &str,
269+
separator: &OsStr,
270+
terminator: &OsStr,
272271
padding: usize,
273272
) -> std::io::Result<()> {
274273
// Nothing to do, just return.
@@ -305,7 +304,7 @@ fn fast_print_seq(
305304

306305
// Initialize buf with first and separator.
307306
buf[start..num_end].copy_from_slice(first_str.as_bytes());
308-
buf[num_end..].copy_from_slice(separator.as_bytes());
307+
buf[num_end..].copy_from_slice(separator.as_encoded_bytes());
309308

310309
// Normally, if padding is > 0, it should be equal to last_length,
311310
// so start would be == 0, but there are corner cases.
@@ -321,7 +320,7 @@ fn fast_print_seq(
321320
}
322321
// Write the last number without separator, but with terminator.
323322
stdout.write_all(&buf[start..num_end])?;
324-
write!(stdout, "{terminator}")?;
323+
stdout.write_all(terminator.as_encoded_bytes())?;
325324
stdout.flush()?;
326325
Ok(())
327326
}
@@ -337,8 +336,8 @@ fn done_printing<T: Zero + PartialOrd>(next: &T, increment: &T, last: &T) -> boo
337336
/// Arbitrary precision decimal number code path ("slow" path)
338337
fn print_seq(
339338
range: RangeFloat,
340-
separator: &str,
341-
terminator: &str,
339+
separator: &OsStr,
340+
terminator: &OsStr,
342341
format: &Format<num_format::Float, &ExtendedBigDecimal>,
343342
fast_allowed: bool,
344343
padding: usize, // Used by fast path only
@@ -375,15 +374,15 @@ fn print_seq(
375374
let mut is_first_iteration = true;
376375
while !done_printing(&value, &increment, &last) {
377376
if !is_first_iteration {
378-
stdout.write_all(separator.as_bytes())?;
377+
stdout.write_all(separator.as_encoded_bytes())?;
379378
}
380379
format.fmt(&mut stdout, &value)?;
381380
// TODO Implement augmenting addition.
382381
value = value + increment.clone();
383382
is_first_iteration = false;
384383
}
385384
if !is_first_iteration {
386-
stdout.write_all(terminator.as_bytes())?;
385+
stdout.write_all(terminator.as_encoded_bytes())?;
387386
}
388387
stdout.flush()?;
389388
Ok(())

src/uu/stat/src/stat.rs

Lines changed: 120 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,56 @@ fn pad_and_print(result: &str, left: bool, width: usize, padding: Padding) {
111111
}
112112
}
113113

114+
/// Pads and prints raw bytes (Unix-specific) or falls back to string printing
115+
///
116+
/// On Unix systems, this preserves non-UTF8 data by printing raw bytes
117+
/// On other platforms, falls back to lossy string conversion
118+
fn pad_and_print_bytes<W: Write>(
119+
mut writer: W,
120+
bytes: &[u8],
121+
left: bool,
122+
width: usize,
123+
precision: Precision,
124+
) -> Result<(), std::io::Error> {
125+
let display_bytes = match precision {
126+
Precision::Number(p) if p < bytes.len() => &bytes[..p],
127+
_ => bytes,
128+
};
129+
130+
let display_len = display_bytes.len();
131+
let padding_needed = width.saturating_sub(display_len);
132+
133+
let (left_pad, right_pad) = if left {
134+
(0, padding_needed)
135+
} else {
136+
(padding_needed, 0)
137+
};
138+
139+
if left_pad > 0 {
140+
print_padding(&mut writer, left_pad)?;
141+
}
142+
writer.write_all(display_bytes)?;
143+
if right_pad > 0 {
144+
print_padding(&mut writer, right_pad)?;
145+
}
146+
147+
Ok(())
148+
}
149+
150+
/// print padding based on a writer W and n size
151+
/// writer is genric to be any buffer like: `std::io::stdout`
152+
/// n is the calculated padding size
153+
fn print_padding<W: Write>(writer: &mut W, n: usize) -> Result<(), std::io::Error> {
154+
for _ in 0..n {
155+
writer.write_all(b" ")?;
156+
}
157+
Ok(())
158+
}
159+
114160
#[derive(Debug)]
115-
pub enum OutputType {
161+
pub enum OutputType<'a> {
116162
Str(String),
163+
OsStr(&'a OsString),
117164
Integer(i64),
118165
Unsigned(u64),
119166
UnsignedHex(u64),
@@ -306,6 +353,7 @@ fn print_it(output: &OutputType, flags: Flags, width: usize, precision: Precisio
306353

307354
match output {
308355
OutputType::Str(s) => print_str(s, &flags, width, precision),
356+
OutputType::OsStr(s) => print_os_str(s, &flags, width, precision),
309357
OutputType::Integer(num) => print_integer(*num, &flags, width, precision, padding_char),
310358
OutputType::Unsigned(num) => print_unsigned(*num, &flags, width, precision, padding_char),
311359
OutputType::UnsignedOct(num) => {
@@ -354,6 +402,37 @@ fn print_str(s: &str, flags: &Flags, width: usize, precision: Precision) {
354402
pad_and_print(s, flags.left, width, Padding::Space);
355403
}
356404

405+
/// Prints a `OsString` value based on the provided flags, width, and precision.
406+
/// for unix it converts it to bytes then tries to print it if failed print the lossy string version
407+
/// for windows, `OsString` uses UTF-16 internally which doesn't map directly to bytes like Unix,
408+
/// so we fall back to lossy string conversion to handle invalid UTF-8 sequences gracefully
409+
///
410+
/// # Arguments
411+
///
412+
/// * `s` - The `OsString` to be printed.
413+
/// * `flags` - A reference to the Flags struct containing formatting flags.
414+
/// * `width` - The width of the field for the printed string.
415+
/// * `precision` - How many digits of precision, if any.
416+
fn print_os_str(s: &OsString, flags: &Flags, width: usize, precision: Precision) {
417+
#[cfg(unix)]
418+
{
419+
use std::os::unix::ffi::OsStrExt;
420+
421+
let bytes = s.as_bytes();
422+
423+
if pad_and_print_bytes(std::io::stdout(), bytes, flags.left, width, precision).is_err() {
424+
// if an error occurred while trying to print bytes fall back to normal lossy string so it can be printed
425+
let fallback_string = s.to_string_lossy();
426+
print_str(&fallback_string, flags, width, precision);
427+
}
428+
}
429+
#[cfg(not(unix))]
430+
{
431+
let lossy_string = s.to_string_lossy();
432+
print_str(&lossy_string, flags, width, precision);
433+
}
434+
}
435+
357436
fn quote_file_name(file_name: &str, quoting_style: &QuotingStyle) -> String {
358437
match quoting_style {
359438
QuotingStyle::Locale | QuotingStyle::Shell => {
@@ -890,16 +969,12 @@ impl Stater {
890969
})
891970
}
892971

893-
fn find_mount_point<P: AsRef<Path>>(&self, p: P) -> Option<String> {
972+
fn find_mount_point<P: AsRef<Path>>(&self, p: P) -> Option<&OsString> {
894973
let path = p.as_ref().canonicalize().ok()?;
895-
896-
for root in self.mount_list.as_ref()? {
897-
if path.starts_with(root) {
898-
// TODO: This is probably wrong, we should pass the OsString
899-
return Some(root.to_string_lossy().into_owned());
900-
}
901-
}
902-
None
974+
self.mount_list
975+
.as_ref()?
976+
.iter()
977+
.find(|root| path.starts_with(root))
903978
}
904979

905980
fn exec(&self) -> i32 {
@@ -993,8 +1068,11 @@ impl Stater {
9931068
'h' => OutputType::Unsigned(meta.nlink()),
9941069
// inode number
9951070
'i' => OutputType::Unsigned(meta.ino()),
996-
// mount point: TODO: This should be an OsStr
997-
'm' => OutputType::Str(self.find_mount_point(file).unwrap()),
1071+
// mount point
1072+
'm' => match self.find_mount_point(file) {
1073+
Some(s) => OutputType::OsStr(s),
1074+
None => OutputType::Str(String::new()),
1075+
},
9981076
// file name
9991077
'n' => OutputType::Str(display_name.to_string()),
10001078
// quoted file name with dereference if symbolic link
@@ -1300,6 +1378,8 @@ fn pretty_time(meta: &Metadata, md_time_field: MetadataTimeField) -> String {
13001378

13011379
#[cfg(test)]
13021380
mod tests {
1381+
use crate::{pad_and_print_bytes, print_padding};
1382+
13031383
use super::{Flags, Precision, ScanUtil, Stater, Token, group_num, precision_trunc};
13041384

13051385
#[test]
@@ -1421,4 +1501,32 @@ mod tests {
14211501
assert_eq!(precision_trunc(123.456, Precision::Number(4)), "123.4560");
14221502
assert_eq!(precision_trunc(123.456, Precision::Number(5)), "123.45600");
14231503
}
1504+
1505+
#[test]
1506+
fn test_pad_and_print_bytes() {
1507+
// testing non-utf8 with normal settings
1508+
let mut buffer = Vec::new();
1509+
let bytes = b"\x80\xFF\x80";
1510+
pad_and_print_bytes(&mut buffer, bytes, false, 3, Precision::NotSpecified).unwrap();
1511+
assert_eq!(&buffer, b"\x80\xFF\x80");
1512+
1513+
// testing left padding
1514+
let mut buffer = Vec::new();
1515+
let bytes = b"\x80\xFF\x80";
1516+
pad_and_print_bytes(&mut buffer, bytes, false, 5, Precision::NotSpecified).unwrap();
1517+
assert_eq!(&buffer, b" \x80\xFF\x80");
1518+
1519+
// testing right padding
1520+
let mut buffer = Vec::new();
1521+
let bytes = b"\x80\xFF\x80";
1522+
pad_and_print_bytes(&mut buffer, bytes, true, 5, Precision::NotSpecified).unwrap();
1523+
assert_eq!(&buffer, b"\x80\xFF\x80 ");
1524+
}
1525+
1526+
#[test]
1527+
fn test_print_padding() {
1528+
let mut buffer = Vec::new();
1529+
print_padding(&mut buffer, 5).unwrap();
1530+
assert_eq!(&buffer, b" ");
1531+
}
14241532
}

0 commit comments

Comments
 (0)