Skip to content

Commit 238c1e7

Browse files
committed
Auto merge of #124773 - Marcondiro:master, r=joboet
fix #124714 str.to_lowercase sigma handling Hello, This PR fixes issue #124714 about 'Σ' handling in `str.to_lowercase()`. The fix consists in considering the full original string during 'Σ' handling instead of considering just the substring left after the optimized ascii handling. A new test is added to avoid regression. Thanks!
2 parents 8c7c151 + bbdf972 commit 238c1e7

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

library/alloc/src/str.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -375,14 +375,16 @@ impl str {
375375
// Safety: We have written only valid ASCII to our vec
376376
let mut s = unsafe { String::from_utf8_unchecked(out) };
377377

378-
for (i, c) in rest[..].char_indices() {
378+
for (i, c) in rest.char_indices() {
379379
if c == 'Σ' {
380380
// Σ maps to σ, except at the end of a word where it maps to ς.
381381
// This is the only conditional (contextual) but language-independent mapping
382382
// in `SpecialCasing.txt`,
383383
// so hard-code it rather than have a generic "condition" mechanism.
384384
// See https://github.com/rust-lang/rust/issues/26035
385-
map_uppercase_sigma(rest, i, &mut s)
385+
let out_len = self.len() - rest.len();
386+
let sigma_lowercase = map_uppercase_sigma(&self, i + out_len);
387+
s.push(sigma_lowercase);
386388
} else {
387389
match conversions::to_lower(c) {
388390
[a, '\0', _] => s.push(a),
@@ -400,13 +402,13 @@ impl str {
400402
}
401403
return s;
402404

403-
fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) {
405+
fn map_uppercase_sigma(from: &str, i: usize) -> char {
404406
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
405407
// for the definition of `Final_Sigma`.
406408
debug_assert!('Σ'.len_utf8() == 2);
407409
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
408410
&& !case_ignorable_then_cased(from[i + 2..].chars());
409-
to.push_str(if is_word_final { "ς" } else { "σ" });
411+
if is_word_final { 'ς' } else { 'σ' }
410412
}
411413

412414
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {

library/alloc/tests/str.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1848,6 +1848,9 @@ fn to_lowercase() {
18481848
assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α");
18491849
assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α");
18501850

1851+
// https://github.com/rust-lang/rust/issues/124714
1852+
assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς");
1853+
18511854
// a really long string that has it's lowercase form
18521855
// even longer. this tests that implementations don't assume
18531856
// an incorrect upper bound on allocations

0 commit comments

Comments
 (0)