@@ -76,17 +76,24 @@ enum EscapeState {
7676 Octal ( EscapeOctal ) ,
7777}
7878
79- /// Byte we need to present as escaped octal, in the form of `\nnn`
79+ /// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
80+ /// Only supports characters up to 2 bytes long in UTF-8.
8081struct EscapeOctal {
81- c : u8 ,
82+ c : [ u8 ; 2 ] ,
8283 state : EscapeOctalState ,
83- idx : usize ,
84+ idx : u8 ,
8485}
8586
8687enum EscapeOctalState {
8788 Done ,
88- Backslash ,
89- Value ,
89+ FirstBackslash ,
90+ FirstValue ,
91+ LastBackslash ,
92+ LastValue ,
93+ }
94+
95+ fn byte_to_octal_digit ( byte : u8 , idx : u8 ) -> u8 {
96+ ( byte >> ( idx * 3 ) ) & 0o7
9097}
9198
9299impl Iterator for EscapeOctal {
@@ -95,12 +102,26 @@ impl Iterator for EscapeOctal {
95102 fn next ( & mut self ) -> Option < char > {
96103 match self . state {
97104 EscapeOctalState :: Done => None ,
98- EscapeOctalState :: Backslash => {
99- self . state = EscapeOctalState :: Value ;
105+ EscapeOctalState :: FirstBackslash => {
106+ self . state = EscapeOctalState :: FirstValue ;
100107 Some ( '\\' )
101108 }
102- EscapeOctalState :: Value => {
103- let octal_digit = ( ( self . c ) >> ( self . idx * 3 ) ) & 0o7 ;
109+ EscapeOctalState :: LastBackslash => {
110+ self . state = EscapeOctalState :: LastValue ;
111+ Some ( '\\' )
112+ }
113+ EscapeOctalState :: FirstValue => {
114+ let octal_digit = byte_to_octal_digit ( self . c [ 0 ] , self . idx ) ;
115+ if self . idx == 0 {
116+ self . state = EscapeOctalState :: LastBackslash ;
117+ self . idx = 2 ;
118+ } else {
119+ self . idx -= 1 ;
120+ }
121+ Some ( from_digit ( octal_digit. into ( ) , 8 ) . unwrap ( ) )
122+ }
123+ EscapeOctalState :: LastValue => {
124+ let octal_digit = byte_to_octal_digit ( self . c [ 1 ] , self . idx ) ;
104125 if self . idx == 0 {
105126 self . state = EscapeOctalState :: Done ;
106127 } else {
@@ -113,11 +134,25 @@ impl Iterator for EscapeOctal {
113134}
114135
115136impl EscapeOctal {
116- fn from ( c : u8 ) -> Self {
137+ fn from_char ( c : char ) -> Self {
138+ if c. len_utf8 ( ) == 1 {
139+ return Self :: from_byte ( c as u8 ) ;
140+ }
141+
142+ let mut buf = [ 0 ; 2 ] ;
143+ let _s = c. encode_utf8 ( & mut buf) ;
144+ Self {
145+ c : buf,
146+ idx : 2 ,
147+ state : EscapeOctalState :: FirstBackslash ,
148+ }
149+ }
150+
151+ fn from_byte ( b : u8 ) -> Self {
117152 Self {
118- c,
153+ c : [ 0 , b ] ,
119154 idx : 2 ,
120- state : EscapeOctalState :: Backslash ,
155+ state : EscapeOctalState :: LastBackslash ,
121156 }
122157 }
123158}
@@ -131,7 +166,7 @@ impl EscapedChar {
131166
132167 fn new_octal ( b : u8 ) -> Self {
133168 Self {
134- state : EscapeState :: Octal ( EscapeOctal :: from ( b) ) ,
169+ state : EscapeState :: Octal ( EscapeOctal :: from_byte ( b) ) ,
135170 }
136171 }
137172
@@ -159,7 +194,7 @@ impl EscapedChar {
159194 _ => Char ( ' ' ) ,
160195 } ,
161196 ':' if dirname => Backslash ( ':' ) ,
162- _ if c. is_ascii_control ( ) => Octal ( EscapeOctal :: from ( c as u8 ) ) ,
197+ _ if c. is_control ( ) => Octal ( EscapeOctal :: from_char ( c ) ) ,
163198 _ => Char ( c) ,
164199 } ;
165200 Self { state : init_state }
@@ -176,11 +211,11 @@ impl EscapedChar {
176211 '\x0B' => Backslash ( 'v' ) ,
177212 '\x0C' => Backslash ( 'f' ) ,
178213 '\r' => Backslash ( 'r' ) ,
179- '\x00' ..='\x1F' | '\x7F' => Octal ( EscapeOctal :: from ( c as u8 ) ) ,
180214 '\'' => match quotes {
181215 Quotes :: Single => Backslash ( '\'' ) ,
182216 _ => Char ( '\'' ) ,
183217 } ,
218+ _ if c. is_control ( ) => Octal ( EscapeOctal :: from_char ( c) ) ,
184219 _ if SPECIAL_SHELL_CHARS . contains ( c) => ForceQuote ( c) ,
185220 _ => Char ( c) ,
186221 } ;
@@ -559,10 +594,10 @@ mod tests {
559594 ( "\" one_two\" " , "c" ) ,
560595 ( "one_two" , "shell" ) ,
561596 ( "one_two" , "shell-show" ) ,
562- ( "\ ' one_two\ ' " , "shell-always" ) ,
563- ( "\ ' one_two\ ' " , "shell-always-show" ) ,
597+ ( "'one_two'" , "shell-always" ) ,
598+ ( "'one_two'" , "shell-always-show" ) ,
564599 ( "one_two" , "shell-escape" ) ,
565- ( "\ ' one_two\ ' " , "shell-escape-always" ) ,
600+ ( "'one_two'" , "shell-escape-always" ) ,
566601 ] ,
567602 ) ;
568603 }
@@ -576,12 +611,12 @@ mod tests {
576611 ( "one two" , "literal-show" ) ,
577612 ( "one\\ two" , "escape" ) ,
578613 ( "\" one two\" " , "c" ) ,
579- ( "\ ' one two\ ' " , "shell" ) ,
580- ( "\ ' one two\ ' " , "shell-show" ) ,
581- ( "\ ' one two\ ' " , "shell-always" ) ,
582- ( "\ ' one two\ ' " , "shell-always-show" ) ,
583- ( "\ ' one two\ ' " , "shell-escape" ) ,
584- ( "\ ' one two\ ' " , "shell-escape-always" ) ,
614+ ( "'one two'" , "shell" ) ,
615+ ( "'one two'" , "shell-show" ) ,
616+ ( "'one two'" , "shell-always" ) ,
617+ ( "'one two'" , "shell-always-show" ) ,
618+ ( "'one two'" , "shell-escape" ) ,
619+ ( "'one two'" , "shell-escape-always" ) ,
585620 ] ,
586621 ) ;
587622
@@ -623,7 +658,7 @@ mod tests {
623658
624659 // One single quote
625660 check_names (
626- "one\ ' two" ,
661+ "one'two" ,
627662 & [
628663 ( "one'two" , "literal" ) ,
629664 ( "one'two" , "literal-show" ) ,
@@ -709,7 +744,7 @@ mod tests {
709744 ] ,
710745 ) ;
711746
712- // The first 16 control characters. NUL is also included, even though it is of
747+ // The first 16 ASCII control characters. NUL is also included, even though it is of
713748 // no importance for file names.
714749 check_names (
715750 "\x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0A \x0B \x0C \x0D \x0E \x0F " ,
@@ -748,7 +783,7 @@ mod tests {
748783 ] ,
749784 ) ;
750785
751- // The last 16 control characters.
786+ // The last 16 ASCII control characters.
752787 check_names (
753788 "\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1A \x1B \x1C \x1D \x1E \x1F " ,
754789 & [
@@ -802,6 +837,42 @@ mod tests {
802837 ( "''$'\\ 177'" , "shell-escape-always" ) ,
803838 ] ,
804839 ) ;
840+
841+ // The first 16 Unicode control characters.
842+ let test_str = std:: str:: from_utf8 ( b"\xC2 \x80 \xC2 \x81 \xC2 \x82 \xC2 \x83 \xC2 \x84 \xC2 \x85 \xC2 \x86 \xC2 \x87 \xC2 \x88 \xC2 \x89 \xC2 \x8A \xC2 \x8B \xC2 \x8C \xC2 \x8D \xC2 \x8E \xC2 \x8F " ) . unwrap ( ) ;
843+ check_names (
844+ test_str,
845+ & [
846+ ( "????????????????" , "literal" ) ,
847+ ( test_str, "literal-show" ) ,
848+ ( "\\ 302\\ 200\\ 302\\ 201\\ 302\\ 202\\ 302\\ 203\\ 302\\ 204\\ 302\\ 205\\ 302\\ 206\\ 302\\ 207\\ 302\\ 210\\ 302\\ 211\\ 302\\ 212\\ 302\\ 213\\ 302\\ 214\\ 302\\ 215\\ 302\\ 216\\ 302\\ 217" , "escape" ) ,
849+ ( "\" \\ 302\\ 200\\ 302\\ 201\\ 302\\ 202\\ 302\\ 203\\ 302\\ 204\\ 302\\ 205\\ 302\\ 206\\ 302\\ 207\\ 302\\ 210\\ 302\\ 211\\ 302\\ 212\\ 302\\ 213\\ 302\\ 214\\ 302\\ 215\\ 302\\ 216\\ 302\\ 217\" " , "c" ) ,
850+ ( "????????????????" , "shell" ) ,
851+ ( test_str, "shell-show" ) ,
852+ ( "'????????????????'" , "shell-always" ) ,
853+ ( & format ! ( "'{}'" , test_str) , "shell-always-show" ) ,
854+ ( "''$'\\ 302\\ 200\\ 302\\ 201\\ 302\\ 202\\ 302\\ 203\\ 302\\ 204\\ 302\\ 205\\ 302\\ 206\\ 302\\ 207\\ 302\\ 210\\ 302\\ 211\\ 302\\ 212\\ 302\\ 213\\ 302\\ 214\\ 302\\ 215\\ 302\\ 216\\ 302\\ 217'" , "shell-escape" ) ,
855+ ( "''$'\\ 302\\ 200\\ 302\\ 201\\ 302\\ 202\\ 302\\ 203\\ 302\\ 204\\ 302\\ 205\\ 302\\ 206\\ 302\\ 207\\ 302\\ 210\\ 302\\ 211\\ 302\\ 212\\ 302\\ 213\\ 302\\ 214\\ 302\\ 215\\ 302\\ 216\\ 302\\ 217'" , "shell-escape-always" ) ,
856+ ] ,
857+ ) ;
858+
859+ // The last 16 Unicode control characters.
860+ let test_str = std:: str:: from_utf8 ( b"\xC2 \x90 \xC2 \x91 \xC2 \x92 \xC2 \x93 \xC2 \x94 \xC2 \x95 \xC2 \x96 \xC2 \x97 \xC2 \x98 \xC2 \x99 \xC2 \x9A \xC2 \x9B \xC2 \x9C \xC2 \x9D \xC2 \x9E \xC2 \x9F " ) . unwrap ( ) ;
861+ check_names (
862+ test_str,
863+ & [
864+ ( "????????????????" , "literal" ) ,
865+ ( test_str, "literal-show" ) ,
866+ ( "\\ 302\\ 220\\ 302\\ 221\\ 302\\ 222\\ 302\\ 223\\ 302\\ 224\\ 302\\ 225\\ 302\\ 226\\ 302\\ 227\\ 302\\ 230\\ 302\\ 231\\ 302\\ 232\\ 302\\ 233\\ 302\\ 234\\ 302\\ 235\\ 302\\ 236\\ 302\\ 237" , "escape" ) ,
867+ ( "\" \\ 302\\ 220\\ 302\\ 221\\ 302\\ 222\\ 302\\ 223\\ 302\\ 224\\ 302\\ 225\\ 302\\ 226\\ 302\\ 227\\ 302\\ 230\\ 302\\ 231\\ 302\\ 232\\ 302\\ 233\\ 302\\ 234\\ 302\\ 235\\ 302\\ 236\\ 302\\ 237\" " , "c" ) ,
868+ ( "????????????????" , "shell" ) ,
869+ ( test_str, "shell-show" ) ,
870+ ( "'????????????????'" , "shell-always" ) ,
871+ ( & format ! ( "'{}'" , test_str) , "shell-always-show" ) ,
872+ ( "''$'\\ 302\\ 220\\ 302\\ 221\\ 302\\ 222\\ 302\\ 223\\ 302\\ 224\\ 302\\ 225\\ 302\\ 226\\ 302\\ 227\\ 302\\ 230\\ 302\\ 231\\ 302\\ 232\\ 302\\ 233\\ 302\\ 234\\ 302\\ 235\\ 302\\ 236\\ 302\\ 237'" , "shell-escape" ) ,
873+ ( "''$'\\ 302\\ 220\\ 302\\ 221\\ 302\\ 222\\ 302\\ 223\\ 302\\ 224\\ 302\\ 225\\ 302\\ 226\\ 302\\ 227\\ 302\\ 230\\ 302\\ 231\\ 302\\ 232\\ 302\\ 233\\ 302\\ 234\\ 302\\ 235\\ 302\\ 236\\ 302\\ 237'" , "shell-escape-always" ) ,
874+ ] ,
875+ ) ;
805876 }
806877
807878 #[ test]
@@ -1060,7 +1131,7 @@ mod tests {
10601131 ( "one\\ \\ two" , "escape" ) ,
10611132 ( "\" one\\ \\ two\" " , "c" ) ,
10621133 ( "'one\\ two'" , "shell" ) ,
1063- ( "\ ' one\\ two\ ' " , "shell-always" ) ,
1134+ ( "'one\\ two'" , "shell-always" ) ,
10641135 ( "'one\\ two'" , "shell-escape" ) ,
10651136 ( "'one\\ two'" , "shell-escape-always" ) ,
10661137 ] ,
0 commit comments