|
16 | 16 | #![stable(feature = "core_char", since = "1.2.0")]
|
17 | 17 |
|
18 | 18 | use char_private::is_printable;
|
| 19 | +use convert::TryFrom; |
| 20 | +use fmt; |
19 | 21 | use iter::FusedIterator;
|
20 | 22 | use mem::transmute;
|
21 | 23 |
|
@@ -122,12 +124,7 @@ pub const MAX: char = '\u{10ffff}';
|
122 | 124 | #[inline]
|
123 | 125 | #[stable(feature = "rust1", since = "1.0.0")]
|
124 | 126 | pub fn from_u32(i: u32) -> Option<char> {
|
125 |
| - // catch out-of-bounds and surrogates |
126 |
| - if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { |
127 |
| - None |
128 |
| - } else { |
129 |
| - Some(unsafe { from_u32_unchecked(i) }) |
130 |
| - } |
| 127 | + char::try_from(i).ok() |
131 | 128 | }
|
132 | 129 |
|
133 | 130 | /// Converts a `u32` to a `char`, ignoring validity.
|
@@ -175,6 +172,66 @@ pub unsafe fn from_u32_unchecked(i: u32) -> char {
|
175 | 172 | transmute(i)
|
176 | 173 | }
|
177 | 174 |
|
| 175 | +#[stable(feature = "char_convert", since = "1.13.0")] |
| 176 | +impl From<char> for u32 { |
| 177 | + #[inline] |
| 178 | + fn from(c: char) -> Self { |
| 179 | + c as u32 |
| 180 | + } |
| 181 | +} |
| 182 | + |
| 183 | +/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF. |
| 184 | +/// |
| 185 | +/// Unicode is designed such that this effectively decodes bytes |
| 186 | +/// with the character encoding that IANA calls ISO-8859-1. |
| 187 | +/// This encoding is compatible with ASCII. |
| 188 | +/// |
| 189 | +/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hypen), |
| 190 | +/// which leaves some "blanks", byte values that are not assigned to any character. |
| 191 | +/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. |
| 192 | +/// |
| 193 | +/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, |
| 194 | +/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks |
| 195 | +/// to punctuation and various Latin characters. |
| 196 | +/// |
| 197 | +/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) |
| 198 | +/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases |
| 199 | +/// for a superset of Windows-1252 that fills the remaining blanks with corresponding |
| 200 | +/// C0 and C1 control codes. |
| 201 | +#[stable(feature = "char_convert", since = "1.13.0")] |
| 202 | +impl From<u8> for char { |
| 203 | + #[inline] |
| 204 | + fn from(i: u8) -> Self { |
| 205 | + i as char |
| 206 | + } |
| 207 | +} |
| 208 | + |
| 209 | +#[unstable(feature = "try_from", issue = "33417")] |
| 210 | +impl TryFrom<u32> for char { |
| 211 | + type Err = CharTryFromError; |
| 212 | + |
| 213 | + #[inline] |
| 214 | + fn try_from(i: u32) -> Result<Self, Self::Err> { |
| 215 | + if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { |
| 216 | + Err(CharTryFromError(())) |
| 217 | + } else { |
| 218 | + Ok(unsafe { from_u32_unchecked(i) }) |
| 219 | + } |
| 220 | + } |
| 221 | +} |
| 222 | + |
| 223 | +/// The error type returned when a conversion from u32 to char fails. |
| 224 | +#[unstable(feature = "try_from", issue = "33417")] |
| 225 | +#[derive(Copy, Clone, Debug, PartialEq, Eq)] |
| 226 | +pub struct CharTryFromError(()); |
| 227 | + |
| 228 | +#[unstable(feature = "try_from", issue = "33417")] |
| 229 | +impl fmt::Display for CharTryFromError { |
| 230 | + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 231 | + "converted integer out of range for `char`".fmt(f) |
| 232 | + } |
| 233 | +} |
| 234 | + |
178 | 235 | /// Converts a digit in the given radix to a `char`.
|
179 | 236 | ///
|
180 | 237 | /// A 'radix' here is sometimes also called a 'base'. A radix of two
|
|
0 commit comments