Skip to content

Commit 3438c0f

Browse files
committed
Auto merge of #42613 - stepancheg:lossy, r=alexcrichton
Avoid allocations in Display for OsStr and Path #38879
2 parents 1169a1f + ac96fd7 commit 3438c0f

File tree

12 files changed

+450
-134
lines changed

12 files changed

+450
-134
lines changed

src/liballoc/string.rs

+22-99
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ use core::hash;
6161
use core::iter::{FromIterator, FusedIterator};
6262
use core::ops::{self, Add, AddAssign, Index, IndexMut};
6363
use core::ptr;
64-
use core::str as core_str;
6564
use core::str::pattern::Pattern;
65+
use std_unicode::lossy;
6666
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
6767

6868
use borrow::{Cow, ToOwned};
@@ -533,111 +533,34 @@ impl String {
533533
/// ```
534534
#[stable(feature = "rust1", since = "1.0.0")]
535535
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
536-
let mut i;
537-
match str::from_utf8(v) {
538-
Ok(s) => return Cow::Borrowed(s),
539-
Err(e) => i = e.valid_up_to(),
540-
}
536+
let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();
541537

542-
const TAG_CONT_U8: u8 = 128;
543-
const REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
544-
let total = v.len();
545-
fn unsafe_get(xs: &[u8], i: usize) -> u8 {
546-
unsafe { *xs.get_unchecked(i) }
547-
}
548-
fn safe_get(xs: &[u8], i: usize, total: usize) -> u8 {
549-
if i >= total { 0 } else { unsafe_get(xs, i) }
550-
}
538+
let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
539+
let lossy::Utf8LossyChunk { valid, broken } = chunk;
540+
if valid.len() == v.len() {
541+
debug_assert!(broken.is_empty());
542+
return Cow::Borrowed(valid);
543+
}
544+
(valid, broken)
545+
} else {
546+
return Cow::Borrowed("");
547+
};
551548

552-
let mut res = String::with_capacity(total);
549+
const REPLACEMENT: &'static str = "\u{FFFD}";
553550

554-
if i > 0 {
555-
unsafe { res.as_mut_vec().extend_from_slice(&v[..i]) };
551+
let mut res = String::with_capacity(v.len());
552+
res.push_str(first_valid);
553+
if !first_broken.is_empty() {
554+
res.push_str(REPLACEMENT);
556555
}
557556

558-
// subseqidx is the index of the first byte of the subsequence we're
559-
// looking at. It's used to copy a bunch of contiguous good codepoints
560-
// at once instead of copying them one by one.
561-
let mut subseqidx = i;
562-
563-
while i < total {
564-
let i_ = i;
565-
let byte = unsafe_get(v, i);
566-
i += 1;
567-
568-
macro_rules! error { () => ({
569-
unsafe {
570-
if subseqidx != i_ {
571-
res.as_mut_vec().extend_from_slice(&v[subseqidx..i_]);
572-
}
573-
subseqidx = i;
574-
res.as_mut_vec().extend_from_slice(REPLACEMENT);
575-
}
576-
})}
577-
578-
if byte < 128 {
579-
// subseqidx handles this
580-
} else {
581-
let w = core_str::utf8_char_width(byte);
582-
583-
match w {
584-
2 => {
585-
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
586-
error!();
587-
continue;
588-
}
589-
i += 1;
590-
}
591-
3 => {
592-
match (byte, safe_get(v, i, total)) {
593-
(0xE0, 0xA0...0xBF) => (),
594-
(0xE1...0xEC, 0x80...0xBF) => (),
595-
(0xED, 0x80...0x9F) => (),
596-
(0xEE...0xEF, 0x80...0xBF) => (),
597-
_ => {
598-
error!();
599-
continue;
600-
}
601-
}
602-
i += 1;
603-
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
604-
error!();
605-
continue;
606-
}
607-
i += 1;
608-
}
609-
4 => {
610-
match (byte, safe_get(v, i, total)) {
611-
(0xF0, 0x90...0xBF) => (),
612-
(0xF1...0xF3, 0x80...0xBF) => (),
613-
(0xF4, 0x80...0x8F) => (),
614-
_ => {
615-
error!();
616-
continue;
617-
}
618-
}
619-
i += 1;
620-
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
621-
error!();
622-
continue;
623-
}
624-
i += 1;
625-
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
626-
error!();
627-
continue;
628-
}
629-
i += 1;
630-
}
631-
_ => {
632-
error!();
633-
continue;
634-
}
635-
}
557+
for lossy::Utf8LossyChunk { valid, broken } in iter {
558+
res.push_str(valid);
559+
if !broken.is_empty() {
560+
res.push_str(REPLACEMENT);
636561
}
637562
}
638-
if subseqidx < total {
639-
unsafe { res.as_mut_vec().extend_from_slice(&v[subseqidx..total]) };
640-
}
563+
641564
Cow::Owned(res)
642565
}
643566

src/libstd/ffi/os_str.rs

+12-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// except according to those terms.
1010

1111
use borrow::{Borrow, Cow};
12-
use fmt::{self, Debug};
12+
use fmt;
1313
use mem;
1414
use ops;
1515
use cmp;
@@ -312,8 +312,8 @@ impl Default for OsString {
312312
}
313313

314314
#[stable(feature = "rust1", since = "1.0.0")]
315-
impl Debug for OsString {
316-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
315+
impl fmt::Debug for OsString {
316+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
317317
fmt::Debug::fmt(&**self, formatter)
318318
}
319319
}
@@ -669,9 +669,15 @@ impl Hash for OsStr {
669669
}
670670

671671
#[stable(feature = "rust1", since = "1.0.0")]
672-
impl Debug for OsStr {
673-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
674-
self.inner.fmt(formatter)
672+
impl fmt::Debug for OsStr {
673+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
674+
fmt::Debug::fmt(&self.inner, formatter)
675+
}
676+
}
677+
678+
impl OsStr {
679+
pub(crate) fn display(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
680+
fmt::Display::fmt(&self.inner, formatter)
675681
}
676682
}
677683

src/libstd/path.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -2281,8 +2281,8 @@ impl AsRef<OsStr> for Path {
22812281

22822282
#[stable(feature = "rust1", since = "1.0.0")]
22832283
impl fmt::Debug for Path {
2284-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
2285-
self.inner.fmt(formatter)
2284+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2285+
fmt::Debug::fmt(&self.inner, formatter)
22862286
}
22872287
}
22882288

@@ -2314,14 +2314,14 @@ pub struct Display<'a> {
23142314
#[stable(feature = "rust1", since = "1.0.0")]
23152315
impl<'a> fmt::Debug for Display<'a> {
23162316
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2317-
fmt::Debug::fmt(&self.path.to_string_lossy(), f)
2317+
fmt::Debug::fmt(&self.path, f)
23182318
}
23192319
}
23202320

23212321
#[stable(feature = "rust1", since = "1.0.0")]
23222322
impl<'a> fmt::Display for Display<'a> {
23232323
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2324-
fmt::Display::fmt(&self.path.to_string_lossy(), f)
2324+
self.path.inner.display(f)
23252325
}
23262326
}
23272327

src/libstd/sys/redox/os_str.rs

+20-7
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
/// a `Vec<u8>`/`[u8]`.
1313
1414
use borrow::Cow;
15-
use fmt::{self, Debug};
15+
use fmt;
1616
use str;
1717
use mem;
1818
use sys_common::{AsInner, IntoInner};
19+
use std_unicode::lossy::Utf8Lossy;
1920

2021
#[derive(Clone, Hash)]
2122
pub struct Buf {
@@ -26,15 +27,27 @@ pub struct Slice {
2627
pub inner: [u8]
2728
}
2829

29-
impl Debug for Slice {
30-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
31-
self.to_string_lossy().fmt(formatter)
30+
impl fmt::Debug for Slice {
31+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
32+
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
3233
}
3334
}
3435

35-
impl Debug for Buf {
36-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
37-
self.as_slice().fmt(formatter)
36+
impl fmt::Display for Slice {
37+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
38+
fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
39+
}
40+
}
41+
42+
impl fmt::Debug for Buf {
43+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
44+
fmt::Debug::fmt(self.as_slice(), formatter)
45+
}
46+
}
47+
48+
impl fmt::Display for Buf {
49+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
50+
fmt::Display::fmt(self.as_slice(), formatter)
3851
}
3952
}
4053

src/libstd/sys/unix/os_str.rs

+20-7
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
/// a `Vec<u8>`/`[u8]`.
1313
1414
use borrow::Cow;
15-
use fmt::{self, Debug};
15+
use fmt;
1616
use str;
1717
use mem;
1818
use sys_common::{AsInner, IntoInner};
19+
use std_unicode::lossy::Utf8Lossy;
1920

2021
#[derive(Clone, Hash)]
2122
pub struct Buf {
@@ -26,15 +27,27 @@ pub struct Slice {
2627
pub inner: [u8]
2728
}
2829

29-
impl Debug for Slice {
30-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
31-
self.to_string_lossy().fmt(formatter)
30+
impl fmt::Debug for Slice {
31+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
32+
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
3233
}
3334
}
3435

35-
impl Debug for Buf {
36-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
37-
self.as_slice().fmt(formatter)
36+
impl fmt::Display for Slice {
37+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
38+
fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
39+
}
40+
}
41+
42+
impl fmt::Debug for Buf {
43+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
44+
fmt::Debug::fmt(self.as_slice(), formatter)
45+
}
46+
}
47+
48+
impl fmt::Display for Buf {
49+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
50+
fmt::Display::fmt(self.as_slice(), formatter)
3851
}
3952
}
4053

src/libstd/sys/windows/os_str.rs

+19-7
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
/// wrapper around the "WTF-8" encoding; see the `wtf8` module for more.
1313
1414
use borrow::Cow;
15-
use fmt::{self, Debug};
15+
use fmt;
1616
use sys_common::wtf8::{Wtf8, Wtf8Buf};
1717
use mem;
1818
use sys_common::{AsInner, IntoInner};
@@ -34,19 +34,31 @@ impl AsInner<Wtf8> for Buf {
3434
}
3535
}
3636

37-
impl Debug for Buf {
38-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
39-
self.as_slice().fmt(formatter)
37+
impl fmt::Debug for Buf {
38+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
39+
fmt::Debug::fmt(self.as_slice(), formatter)
40+
}
41+
}
42+
43+
impl fmt::Display for Buf {
44+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
45+
fmt::Display::fmt(self.as_slice(), formatter)
4046
}
4147
}
4248

4349
pub struct Slice {
4450
pub inner: Wtf8
4551
}
4652

47-
impl Debug for Slice {
48-
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
49-
self.inner.fmt(formatter)
53+
impl fmt::Debug for Slice {
54+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
55+
fmt::Debug::fmt(&self.inner, formatter)
56+
}
57+
}
58+
59+
impl fmt::Display for Slice {
60+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
61+
fmt::Display::fmt(&self.inner, formatter)
5062
}
5163
}
5264

0 commit comments

Comments
 (0)