Skip to content

Commit 48615a6

Browse files
committed
std: Account for CRLF in {str, BufRead}::lines
This commit is an implementation of [RFC 1212][rfc] which tweaks the behavior of the `str::lines` and `BufRead::lines` iterators. Both iterators now account for `\r\n` sequences in addition to `\n`, allowing for less surprising behavior across platforms (especially in the `BufRead` case). Splitting *only* on the `\n` character can still be achieved with `split('\n')` in both cases. The `str::lines_any` function is also now deprecated as `str::lines` is a drop-in replacement for it. [rfc]: https://github.com/rust-lang/rfcs/blob/master/text/1212-line-endings.md Closes #28032
1 parent 35b1454 commit 48615a6

File tree

6 files changed

+27
-16
lines changed

6 files changed

+27
-16
lines changed

src/libcollections/str.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -604,14 +604,14 @@ impl str {
604604
UnicodeStr::split_whitespace(self)
605605
}
606606

607-
/// An iterator over the lines of a string, separated by `\n`.
607+
/// An iterator over the lines of a string, separated by `\n` or `\r\n`.
608608
///
609-
/// This does not include the empty string after a trailing `\n`.
609+
/// This does not include the empty string after a trailing newline or CRLF.
610610
///
611611
/// # Examples
612612
///
613613
/// ```
614-
/// let four_lines = "foo\nbar\n\nbaz";
614+
/// let four_lines = "foo\nbar\n\r\nbaz";
615615
/// let v: Vec<&str> = four_lines.lines().collect();
616616
///
617617
/// assert_eq!(v, ["foo", "bar", "", "baz"]);
@@ -620,7 +620,7 @@ impl str {
620620
/// Leaving off the trailing character:
621621
///
622622
/// ```
623-
/// let four_lines = "foo\nbar\n\nbaz\n";
623+
/// let four_lines = "foo\r\nbar\n\nbaz\n";
624624
/// let v: Vec<&str> = four_lines.lines().collect();
625625
///
626626
/// assert_eq!(v, ["foo", "bar", "", "baz"]);
@@ -654,7 +654,9 @@ impl str {
654654
/// assert_eq!(v, ["foo", "bar", "", "baz"]);
655655
/// ```
656656
#[stable(feature = "rust1", since = "1.0.0")]
657+
#[deprecated(since = "1.4.0", reason = "use lines() instead now")]
657658
#[inline]
659+
#[allow(deprecated)]
658660
pub fn lines_any(&self) -> LinesAny {
659661
core_str::StrExt::lines_any(self)
660662
}

src/libcollectionstest/str.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -964,11 +964,11 @@ fn test_split_whitespace() {
964964

965965
#[test]
966966
fn test_lines() {
967-
let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
967+
let data = "\nMäry häd ä little lämb\n\r\nLittle lämb\n";
968968
let lines: Vec<&str> = data.lines().collect();
969969
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
970970

971-
let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
971+
let data = "\r\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
972972
let lines: Vec<&str> = data.lines().collect();
973973
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
974974
}

src/libcore/str/mod.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,7 @@ generate_pattern_iterators! {
827827
/// Created with the method `.lines()`.
828828
#[stable(feature = "rust1", since = "1.0.0")]
829829
#[derive(Clone)]
830-
pub struct Lines<'a>(SplitTerminator<'a, char>);
830+
pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
831831

832832
#[stable(feature = "rust1", since = "1.0.0")]
833833
impl<'a> Iterator for Lines<'a> {
@@ -854,8 +854,10 @@ impl<'a> DoubleEndedIterator for Lines<'a> {
854854

855855
/// Created with the method `.lines_any()`.
856856
#[stable(feature = "rust1", since = "1.0.0")]
857+
#[deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
857858
#[derive(Clone)]
858-
pub struct LinesAny<'a>(Map<Lines<'a>, LinesAnyMap>);
859+
#[allow(deprecated)]
860+
pub struct LinesAny<'a>(Lines<'a>);
859861

860862
/// A nameable, clonable fn type
861863
#[derive(Clone)]
@@ -887,6 +889,7 @@ impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
887889
}
888890

889891
#[stable(feature = "rust1", since = "1.0.0")]
892+
#[allow(deprecated)]
890893
impl<'a> Iterator for LinesAny<'a> {
891894
type Item = &'a str;
892895

@@ -902,6 +905,7 @@ impl<'a> Iterator for LinesAny<'a> {
902905
}
903906

904907
#[stable(feature = "rust1", since = "1.0.0")]
908+
#[allow(deprecated)]
905909
impl<'a> DoubleEndedIterator for LinesAny<'a> {
906910
#[inline]
907911
fn next_back(&mut self) -> Option<&'a str> {
@@ -1289,6 +1293,7 @@ pub trait StrExt {
12891293
fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
12901294
where P::Searcher: ReverseSearcher<'a>;
12911295
fn lines(&self) -> Lines;
1296+
#[allow(deprecated)]
12921297
fn lines_any(&self) -> LinesAny;
12931298
fn char_len(&self) -> usize;
12941299
fn slice_chars(&self, begin: usize, end: usize) -> &str;
@@ -1428,12 +1433,13 @@ impl StrExt for str {
14281433
}
14291434
#[inline]
14301435
fn lines(&self) -> Lines {
1431-
Lines(self.split_terminator('\n'))
1436+
Lines(self.split_terminator('\n').map(LinesAnyMap))
14321437
}
14331438

14341439
#[inline]
1440+
#[allow(deprecated)]
14351441
fn lines_any(&self) -> LinesAny {
1436-
LinesAny(self.lines().map(LinesAnyMap))
1442+
LinesAny(self.lines())
14371443
}
14381444

14391445
#[inline]

src/librustdoc/passes.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ pub fn collapse_docs(krate: clean::Crate) -> plugins::PluginResult {
308308
}
309309

310310
pub fn unindent(s: &str) -> String {
311-
let lines = s.lines_any().collect::<Vec<&str> >();
311+
let lines = s.lines().collect::<Vec<&str> >();
312312
let mut saw_first_line = false;
313313
let mut saw_second_line = false;
314314
let min_indent = lines.iter().fold(usize::MAX, |min_indent, line| {

src/libstd/io/mod.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,7 @@ pub trait BufRead: Read {
14391439
///
14401440
/// The iterator returned from this function will yield instances of
14411441
/// `io::Result<String>`. Each string returned will *not* have a newline
1442-
/// byte (the 0xA byte) at the end.
1442+
/// byte (the 0xA byte) or CRLF (0xD, 0xA bytes) at the end.
14431443
///
14441444
/// # Examples
14451445
///
@@ -1763,6 +1763,9 @@ impl<B: BufRead> Iterator for Lines<B> {
17631763
Ok(_n) => {
17641764
if buf.ends_with("\n") {
17651765
buf.pop();
1766+
if buf.ends_with("\r") {
1767+
buf.pop();
1768+
}
17661769
}
17671770
Some(Ok(buf))
17681771
}
@@ -1834,12 +1837,12 @@ mod tests {
18341837

18351838
#[test]
18361839
fn lines() {
1837-
let buf = Cursor::new(&b"12"[..]);
1840+
let buf = Cursor::new(&b"12\r"[..]);
18381841
let mut s = buf.lines();
1839-
assert_eq!(s.next().unwrap().unwrap(), "12".to_string());
1842+
assert_eq!(s.next().unwrap().unwrap(), "12\r".to_string());
18401843
assert!(s.next().is_none());
18411844

1842-
let buf = Cursor::new(&b"12\n\n"[..]);
1845+
let buf = Cursor::new(&b"12\r\n\n"[..]);
18431846
let mut s = buf.lines();
18441847
assert_eq!(s.next().unwrap().unwrap(), "12".to_string());
18451848
assert_eq!(s.next().unwrap().unwrap(), "".to_string());

src/libsyntax/parse/lexer/comments.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {
132132

133133
if comment.starts_with("/*") {
134134
let lines = comment[3..comment.len() - 2]
135-
.lines_any()
135+
.lines()
136136
.map(|s| s.to_string())
137137
.collect::<Vec<String> >();
138138

0 commit comments

Comments
 (0)