@@ -39,7 +39,7 @@ use slice;
39
39
use str;
40
40
use sys_common:: AsInner ;
41
41
42
- const UTF8_REPLACEMENT_CHARACTER : & ' static [ u8 ] = b" \xEF \xBF \xBD ";
42
+ const UTF8_REPLACEMENT_CHARACTER : & ' static str = " \u{FFFD} ";
43
43
44
44
/// A Unicode code point: from U+0000 to U+10FFFF.
45
45
///
@@ -339,7 +339,7 @@ impl Wtf8Buf {
339
339
Some ( ( surrogate_pos, _) ) => {
340
340
pos = surrogate_pos + 3 ;
341
341
self . bytes [ surrogate_pos..pos]
342
- . copy_from_slice ( UTF8_REPLACEMENT_CHARACTER ) ;
342
+ . copy_from_slice ( UTF8_REPLACEMENT_CHARACTER . as_bytes ( ) ) ;
343
343
} ,
344
344
None => return unsafe { String :: from_utf8_unchecked ( self . bytes ) }
345
345
}
@@ -438,6 +438,30 @@ impl fmt::Debug for Wtf8 {
438
438
}
439
439
}
440
440
441
+ impl fmt:: Display for Wtf8 {
442
+ fn fmt ( & self , formatter : & mut fmt:: Formatter ) -> fmt:: Result {
443
+ let wtf8_bytes = & self . bytes ;
444
+ let mut pos = 0 ;
445
+ loop {
446
+ match self . next_surrogate ( pos) {
447
+ Some ( ( surrogate_pos, _) ) => {
448
+ formatter. write_str ( unsafe {
449
+ str:: from_utf8_unchecked ( & wtf8_bytes[ pos .. surrogate_pos] )
450
+ } ) ?;
451
+ formatter. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
452
+ pos = surrogate_pos + 3 ;
453
+ } ,
454
+ None => {
455
+ formatter. write_str ( unsafe {
456
+ str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] )
457
+ } ) ?;
458
+ return Ok ( ( ) ) ;
459
+ }
460
+ }
461
+ }
462
+ }
463
+ }
464
+
441
465
impl Wtf8 {
442
466
/// Creates a WTF-8 slice from a UTF-8 `&str` slice.
443
467
///
@@ -516,13 +540,13 @@ impl Wtf8 {
516
540
let wtf8_bytes = & self . bytes ;
517
541
let mut utf8_bytes = Vec :: with_capacity ( self . len ( ) ) ;
518
542
utf8_bytes. extend_from_slice ( & wtf8_bytes[ ..surrogate_pos] ) ;
519
- utf8_bytes. extend_from_slice ( UTF8_REPLACEMENT_CHARACTER ) ;
543
+ utf8_bytes. extend_from_slice ( UTF8_REPLACEMENT_CHARACTER . as_bytes ( ) ) ;
520
544
let mut pos = surrogate_pos + 3 ;
521
545
loop {
522
546
match self . next_surrogate ( pos) {
523
547
Some ( ( surrogate_pos, _) ) => {
524
548
utf8_bytes. extend_from_slice ( & wtf8_bytes[ pos .. surrogate_pos] ) ;
525
- utf8_bytes. extend_from_slice ( UTF8_REPLACEMENT_CHARACTER ) ;
549
+ utf8_bytes. extend_from_slice ( UTF8_REPLACEMENT_CHARACTER . as_bytes ( ) ) ;
526
550
pos = surrogate_pos + 3 ;
527
551
} ,
528
552
None => {
@@ -1200,6 +1224,20 @@ mod tests {
1200
1224
assert_eq ! ( string. to_string_lossy( ) , expected) ;
1201
1225
}
1202
1226
1227
+ #[ test]
1228
+ fn wtf8_display ( ) {
1229
+ fn d ( b : & [ u8 ] ) -> String {
1230
+ format ! ( "{}" , & unsafe { Wtf8 :: from_bytes_unchecked( b) } )
1231
+ }
1232
+
1233
+ assert_eq ! ( "" , d( "" . as_bytes( ) ) ) ;
1234
+ assert_eq ! ( "aé 💩" , d( "aé 💩" . as_bytes( ) ) ) ;
1235
+
1236
+ let mut string = Wtf8Buf :: from_str ( "aé 💩" ) ;
1237
+ string. push ( CodePoint :: from_u32 ( 0xD800 ) . unwrap ( ) ) ;
1238
+ assert_eq ! ( "aé 💩�" , d( string. as_inner( ) ) ) ;
1239
+ }
1240
+
1203
1241
#[ test]
1204
1242
fn wtf8_encode_wide ( ) {
1205
1243
let mut string = Wtf8Buf :: from_str ( "aé " ) ;
0 commit comments