Skip to content

Commit 22f5bdc

Browse files
committed
Auto merge of rust-lang#124686 - saethlin:rust-file-footer, r=fmease
Add a footer in FileEncoder and check for it in MemDecoder We have a few reports of ICEs due to decoding failures, where the fault does not lie with the compiler. The goal of this PR is to add some very lightweight and on-by-default validation to the compiler's outputs. If validation fails, we emit a fatal error for rmeta files in general that mentions the path that didn't load, and for incremental compilation artifacts we emit a verbose warning that tries to explain the situation and treat the artifacts as outdated. The validation currently implemented here is very crude, and yet I think we have 11 ICE reports currently open (you can find them by searching issues for `1002111927320821928687967599834759150`) which this simple validation would have detected. The structure of the code changes here should permit the addition of further validation code, such as a checksum, if it is merited. I would like to have code to detect corruption such as reported in rust-lang#124719, but I'm not yet sure how to do that efficiently, and this PR is already a good size. The ICE reports I have in mind that this PR would have smoothed over are: rust-lang#124469 rust-lang#123352 rust-lang#123376 [^1] rust-lang#99763 rust-lang#93900. --- [^1]: This one might be a compiler bug, but even if it is I think the workflow described is pushing the envelope of what we can support. This issue is one of the reasons this warning still asks people to file an issue.
2 parents 5d328a1 + c3a6062 commit 22f5bdc

File tree

16 files changed

+124
-54
lines changed

16 files changed

+124
-54
lines changed

compiler/rustc_codegen_ssa/src/lib.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ pub enum CodegenErrors {
195195
EmptyVersionNumber,
196196
EncodingVersionMismatch { version_array: String, rlink_version: u32 },
197197
RustcVersionMismatch { rustc_version: String },
198+
CorruptFile,
198199
}
199200

200201
pub fn provide(providers: &mut Providers) {
@@ -265,7 +266,9 @@ impl CodegenResults {
265266
});
266267
}
267268

268-
let mut decoder = MemDecoder::new(&data[4..], 0);
269+
let Ok(mut decoder) = MemDecoder::new(&data[4..], 0) else {
270+
return Err(CodegenErrors::CorruptFile);
271+
};
269272
let rustc_version = decoder.read_str();
270273
if rustc_version != sess.cfg_version {
271274
return Err(CodegenErrors::RustcVersionMismatch {

compiler/rustc_driver_impl/messages.ftl

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ driver_impl_ice_path_error = the ICE couldn't be written to `{$path}`: {$error}
1010
driver_impl_ice_path_error_env = the environment variable `RUSTC_ICE` is set to `{$env_var}`
1111
driver_impl_ice_version = rustc {$version} running on {$triple}
1212
13+
driver_impl_rlink_corrupt_file = corrupt metadata encountered in `{$file}`
14+
1315
driver_impl_rlink_empty_version_number = The input does not contain version number
1416
1517
driver_impl_rlink_encoding_version_mismatch = .rlink file was produced with encoding version `{$version_array}`, but the current version is `{$rlink_version}`

compiler/rustc_driver_impl/src/lib.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ mod signal_handler {
9696

9797
use crate::session_diagnostics::{
9898
RLinkEmptyVersionNumber, RLinkEncodingVersionMismatch, RLinkRustcVersionMismatch,
99-
RLinkWrongFileType, RlinkNotAFile, RlinkUnableToRead,
99+
RLinkWrongFileType, RlinkCorruptFile, RlinkNotAFile, RlinkUnableToRead,
100100
};
101101

102102
rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
@@ -645,15 +645,17 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) {
645645
match err {
646646
CodegenErrors::WrongFileType => dcx.emit_fatal(RLinkWrongFileType),
647647
CodegenErrors::EmptyVersionNumber => dcx.emit_fatal(RLinkEmptyVersionNumber),
648-
CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => sess
649-
.dcx()
648+
CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => dcx
650649
.emit_fatal(RLinkEncodingVersionMismatch { version_array, rlink_version }),
651650
CodegenErrors::RustcVersionMismatch { rustc_version } => {
652651
dcx.emit_fatal(RLinkRustcVersionMismatch {
653652
rustc_version,
654653
current_version: sess.cfg_version,
655654
})
656655
}
656+
CodegenErrors::CorruptFile => {
657+
dcx.emit_fatal(RlinkCorruptFile { file });
658+
}
657659
};
658660
}
659661
};

compiler/rustc_driver_impl/src/session_diagnostics.rs

+6
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ pub(crate) struct RLinkRustcVersionMismatch<'a> {
3232
#[diag(driver_impl_rlink_no_a_file)]
3333
pub(crate) struct RlinkNotAFile;
3434

35+
#[derive(Diagnostic)]
36+
#[diag(driver_impl_rlink_corrupt_file)]
37+
pub(crate) struct RlinkCorruptFile<'a> {
38+
pub file: &'a std::path::Path,
39+
}
40+
3541
#[derive(Diagnostic)]
3642
#[diag(driver_impl_ice)]
3743
pub(crate) struct Ice;

compiler/rustc_incremental/messages.ftl

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ incremental_cargo_help_2 =
2121
incremental_copy_workproduct_to_cache =
2222
error copying object file `{$from}` to incremental directory as `{$to}`: {$err}
2323
24+
incremental_corrupt_file = corrupt incremental compilation artifact found at `{$path}`. This file will automatically be ignored and deleted. If you see this message repeatedly or can provoke it without manually manipulating the compiler's artifacts, please file an issue. The incremental compilation system relies on hardlinks and filesystem locks behaving correctly, and may not deal well with OS crashes, so whatever information you can provide about your filesystem or other state may be very relevant.
25+
2426
incremental_create_dep_graph = failed to create dependency graph at `{$path}`: {$err}
2527
2628
incremental_create_incr_comp_dir =

compiler/rustc_incremental/src/errors.rs

+6
Original file line numberDiff line numberDiff line change
@@ -306,3 +306,9 @@ pub struct DeleteWorkProduct<'a> {
306306
pub path: &'a Path,
307307
pub err: std::io::Error,
308308
}
309+
310+
#[derive(Diagnostic)]
311+
#[diag(incremental_corrupt_file)]
312+
pub struct CorruptFile<'a> {
313+
pub path: &'a Path,
314+
}

compiler/rustc_incremental/src/persist/load.rs

+17-4
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,12 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc<SerializedDepGraph>, WorkPr
115115

116116
if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result {
117117
// Decode the list of work_products
118-
let mut work_product_decoder = MemDecoder::new(&work_products_data[..], start_pos);
118+
let Ok(mut work_product_decoder) =
119+
MemDecoder::new(&work_products_data[..], start_pos)
120+
else {
121+
sess.dcx().emit_warn(errors::CorruptFile { path: &work_products_path });
122+
return LoadResult::DataOutOfDate;
123+
};
119124
let work_products: Vec<SerializedWorkProduct> =
120125
Decodable::decode(&mut work_product_decoder);
121126

@@ -145,7 +150,10 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc<SerializedDepGraph>, WorkPr
145150
LoadResult::DataOutOfDate => LoadResult::DataOutOfDate,
146151
LoadResult::LoadDepGraph(path, err) => LoadResult::LoadDepGraph(path, err),
147152
LoadResult::Ok { data: (bytes, start_pos) } => {
148-
let mut decoder = MemDecoder::new(&bytes, start_pos);
153+
let Ok(mut decoder) = MemDecoder::new(&bytes, start_pos) else {
154+
sess.dcx().emit_warn(errors::CorruptFile { path: &path });
155+
return LoadResult::DataOutOfDate;
156+
};
149157
let prev_commandline_args_hash = u64::decode(&mut decoder);
150158

151159
if prev_commandline_args_hash != expected_hash {
@@ -181,9 +189,14 @@ pub fn load_query_result_cache(sess: &Session) -> Option<OnDiskCache<'_>> {
181189

182190
let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache");
183191

184-
match load_data(&query_cache_path(sess), sess) {
192+
let path = query_cache_path(sess);
193+
match load_data(&path, sess) {
185194
LoadResult::Ok { data: (bytes, start_pos) } => {
186-
Some(OnDiskCache::new(sess, bytes, start_pos))
195+
let cache = OnDiskCache::new(sess, bytes, start_pos).unwrap_or_else(|()| {
196+
sess.dcx().emit_warn(errors::CorruptFile { path: &path });
197+
OnDiskCache::new_empty(sess.source_map())
198+
});
199+
Some(cache)
187200
}
188201
_ => Some(OnDiskCache::new_empty(sess.source_map())),
189202
}

compiler/rustc_metadata/src/locator.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -853,7 +853,12 @@ fn get_metadata_section<'p>(
853853
slice_owned(mmap, Deref::deref)
854854
}
855855
};
856-
let blob = MetadataBlob(raw_bytes);
856+
let Ok(blob) = MetadataBlob::new(raw_bytes) else {
857+
return Err(MetadataError::LoadFailure(format!(
858+
"corrupt metadata encountered in {}",
859+
filename.display()
860+
)));
861+
};
857862
match blob.check_compatibility(cfg_version) {
858863
Ok(()) => Ok(blob),
859864
Err(None) => Err(MetadataError::LoadFailure(format!(

compiler/rustc_metadata/src/rmeta/decoder.rs

+25-6
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,9 @@ use rustc_span::hygiene::HygieneDecodeContext;
4040
mod cstore_impl;
4141

4242
/// A reference to the raw binary version of crate metadata.
43-
/// A `MetadataBlob` internally is just a reference counted pointer to
44-
/// the actual data, so cloning it is cheap.
45-
#[derive(Clone)]
46-
pub(crate) struct MetadataBlob(pub(crate) OwnedSlice);
43+
/// This struct applies [`MemDecoder`]'s validation when constructed
44+
/// so that later constructions are guaranteed to succeed.
45+
pub(crate) struct MetadataBlob(OwnedSlice);
4746

4847
impl std::ops::Deref for MetadataBlob {
4948
type Target = [u8];
@@ -54,6 +53,19 @@ impl std::ops::Deref for MetadataBlob {
5453
}
5554
}
5655

56+
impl MetadataBlob {
57+
/// Runs the [`MemDecoder`] validation and if it passes, constructs a new [`MetadataBlob`].
58+
pub fn new(slice: OwnedSlice) -> Result<Self, ()> {
59+
if MemDecoder::new(&slice, 0).is_ok() { Ok(Self(slice)) } else { Err(()) }
60+
}
61+
62+
/// Since this has passed the validation of [`MetadataBlob::new`], this returns bytes which are
63+
/// known to pass the [`MemDecoder`] validation.
64+
pub fn bytes(&self) -> &OwnedSlice {
65+
&self.0
66+
}
67+
}
68+
5769
/// A map from external crate numbers (as decoded from some crate file) to
5870
/// local crate numbers (as generated during this session). Each external
5971
/// crate may refer to types in other external crates, and each has their
@@ -165,7 +177,14 @@ pub(super) trait Metadata<'a, 'tcx>: Copy {
165177
fn decoder(self, pos: usize) -> DecodeContext<'a, 'tcx> {
166178
let tcx = self.tcx();
167179
DecodeContext {
168-
opaque: MemDecoder::new(self.blob(), pos),
180+
// FIXME: This unwrap should never panic because we check that it won't when creating
181+
// `MetadataBlob`. Ideally we'd just have a `MetadataDecoder` and hand out subslices of
182+
// it as we do elsewhere in the compiler using `MetadataDecoder::split_at`. But we own
183+
// the data for the decoder so holding onto the `MemDecoder` too would make us a
184+
// self-referential struct which is downright goofy because `MetadataBlob` is already
185+
// self-referential. Probably `MemDecoder` should contain an `OwnedSlice`, but that
186+
// demands a significant refactoring due to our crate graph.
187+
opaque: MemDecoder::new(self.blob(), pos).unwrap(),
169188
cdata: self.cdata(),
170189
blob: self.blob(),
171190
sess: self.sess().or(tcx.map(|tcx| tcx.sess)),
@@ -393,7 +412,7 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
393412
where
394413
F: FnOnce(&mut Self) -> R,
395414
{
396-
let new_opaque = MemDecoder::new(self.opaque.data(), pos);
415+
let new_opaque = self.opaque.split_at(pos);
397416
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
398417
let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode);
399418
let r = f(self);

compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefPathHashMapRef<'static>
4848
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> {
4949
let len = d.read_usize();
5050
let pos = d.position();
51-
let o = d.blob().clone().0.slice(|blob| &blob[pos..pos + len]);
51+
let o = d.blob().bytes().clone().slice(|blob| &blob[pos..pos + len]);
5252

5353
// Although we already have the data we need via the `OwnedSlice`, we still need
5454
// to advance the `DecodeContext`'s position so it's in a valid state after

compiler/rustc_middle/src/query/on_disk_cache.rs

+22-20
Original file line numberDiff line numberDiff line change
@@ -154,24 +154,25 @@ impl EncodedSourceFileId {
154154

155155
impl<'sess> OnDiskCache<'sess> {
156156
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
157-
pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
158-
debug_assert!(sess.opts.incremental.is_some());
159-
160-
// Wrap in a scope so we can borrow `data`.
161-
let footer: Footer = {
162-
let mut decoder = MemDecoder::new(&data, start_pos);
163-
164-
// Decode the *position* of the footer, which can be found in the
165-
// last 8 bytes of the file.
166-
let footer_pos = decoder
167-
.with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| {
168-
IntEncodedWithFixedSize::decode(decoder).0 as usize
169-
});
170-
// Decode the file footer, which contains all the lookup tables, etc.
171-
decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER))
172-
};
157+
///
158+
/// The serialized cache has some basic integrity checks, if those checks indicate that the
159+
/// on-disk data is corrupt, an error is returned.
160+
pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Result<Self, ()> {
161+
assert!(sess.opts.incremental.is_some());
162+
163+
let mut decoder = MemDecoder::new(&data, start_pos)?;
164+
165+
// Decode the *position* of the footer, which can be found in the
166+
// last 8 bytes of the file.
167+
let footer_pos = decoder
168+
.with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| {
169+
IntEncodedWithFixedSize::decode(decoder).0 as usize
170+
});
171+
// Decode the file footer, which contains all the lookup tables, etc.
172+
let footer: Footer =
173+
decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER));
173174

174-
Self {
175+
Ok(Self {
175176
serialized_data: RwLock::new(Some(data)),
176177
file_index_to_stable_id: footer.file_index_to_stable_id,
177178
file_index_to_file: Default::default(),
@@ -184,7 +185,7 @@ impl<'sess> OnDiskCache<'sess> {
184185
expn_data: footer.expn_data,
185186
foreign_expn_data: footer.foreign_expn_data,
186187
hygiene_context: Default::default(),
187-
}
188+
})
188189
}
189190

190191
pub fn new_empty(source_map: &'sess SourceMap) -> Self {
@@ -437,7 +438,8 @@ impl<'sess> OnDiskCache<'sess> {
437438
let serialized_data = self.serialized_data.read();
438439
let mut decoder = CacheDecoder {
439440
tcx,
440-
opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()),
441+
opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize())
442+
.unwrap(),
441443
source_map: self.source_map,
442444
file_index_to_file: &self.file_index_to_file,
443445
file_index_to_stable_id: &self.file_index_to_stable_id,
@@ -558,7 +560,7 @@ impl<'a, 'tcx> TyDecoder for CacheDecoder<'a, 'tcx> {
558560
{
559561
debug_assert!(pos < self.opaque.len());
560562

561-
let new_opaque = MemDecoder::new(self.opaque.data(), pos);
563+
let new_opaque = self.opaque.split_at(pos);
562564
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
563565
let r = f(self);
564566
self.opaque = old_opaque;

compiler/rustc_query_system/src/dep_graph/serialized.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -182,15 +182,13 @@ impl SerializedDepGraph {
182182
pub fn decode<D: Deps>(d: &mut MemDecoder<'_>) -> Arc<SerializedDepGraph> {
183183
// The last 16 bytes are the node count and edge count.
184184
debug!("position: {:?}", d.position());
185-
let (node_count, edge_count, graph_size) =
186-
d.with_position(d.len() - 3 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| {
185+
let (node_count, edge_count) =
186+
d.with_position(d.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| {
187187
debug!("position: {:?}", d.position());
188188
let node_count = IntEncodedWithFixedSize::decode(d).0 as usize;
189189
let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize;
190-
let graph_size = IntEncodedWithFixedSize::decode(d).0 as usize;
191-
(node_count, edge_count, graph_size)
190+
(node_count, edge_count)
192191
});
193-
assert_eq!(d.len(), graph_size);
194192
debug!("position: {:?}", d.position());
195193

196194
debug!(?node_count, ?edge_count);
@@ -606,8 +604,6 @@ impl<D: Deps> EncoderState<D> {
606604
debug!("position: {:?}", encoder.position());
607605
IntEncodedWithFixedSize(node_count).encode(&mut encoder);
608606
IntEncodedWithFixedSize(edge_count).encode(&mut encoder);
609-
let graph_size = encoder.position() + IntEncodedWithFixedSize::ENCODED_SIZE;
610-
IntEncodedWithFixedSize(graph_size as u64).encode(&mut encoder);
611607
debug!("position: {:?}", encoder.position());
612608
// Drop the encoder so that nothing is written after the counts.
613609
let result = encoder.finish();

compiler/rustc_serialize/src/opaque.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ use crate::int_overflow::DebugStrictAdd;
1717

1818
pub type FileEncodeResult = Result<usize, (PathBuf, io::Error)>;
1919

20+
pub const MAGIC_END_BYTES: &[u8] = b"rust-end-file";
21+
2022
/// The size of the buffer in `FileEncoder`.
2123
const BUF_SIZE: usize = 8192;
2224

@@ -181,6 +183,7 @@ impl FileEncoder {
181183
}
182184

183185
pub fn finish(&mut self) -> FileEncodeResult {
186+
self.write_all(MAGIC_END_BYTES);
184187
self.flush();
185188
#[cfg(debug_assertions)]
186189
{
@@ -261,15 +264,18 @@ pub struct MemDecoder<'a> {
261264

262265
impl<'a> MemDecoder<'a> {
263266
#[inline]
264-
pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> {
267+
pub fn new(data: &'a [u8], position: usize) -> Result<MemDecoder<'a>, ()> {
268+
let data = data.strip_suffix(MAGIC_END_BYTES).ok_or(())?;
265269
let Range { start, end } = data.as_ptr_range();
266-
MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }
270+
Ok(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData })
267271
}
268272

269273
#[inline]
270-
pub fn data(&self) -> &'a [u8] {
271-
// SAFETY: This recovers the original slice, only using members we never modify.
272-
unsafe { std::slice::from_raw_parts(self.start, self.len()) }
274+
pub fn split_at(&self, position: usize) -> MemDecoder<'a> {
275+
assert!(position <= self.len());
276+
// SAFETY: We checked above that this offset is within the original slice
277+
let current = unsafe { self.start.add(position) };
278+
MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData }
273279
}
274280

275281
#[inline]

compiler/rustc_serialize/tests/leb128.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use rustc_serialize::leb128::*;
2+
use rustc_serialize::opaque::MemDecoder;
3+
use rustc_serialize::opaque::MAGIC_END_BYTES;
24
use rustc_serialize::Decoder;
35

46
macro_rules! impl_test_unsigned_leb128 {
@@ -25,13 +27,15 @@ macro_rules! impl_test_unsigned_leb128 {
2527
let n = $write_fn_name(&mut buf, x);
2628
stream.extend(&buf[..n]);
2729
}
30+
let stream_end = stream.len();
31+
stream.extend(MAGIC_END_BYTES);
2832

29-
let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);
33+
let mut decoder = MemDecoder::new(&stream, 0).unwrap();
3034
for &expected in &values {
3135
let actual = $read_fn_name(&mut decoder);
3236
assert_eq!(expected, actual);
3337
}
34-
assert_eq!(stream.len(), decoder.position());
38+
assert_eq!(stream_end, decoder.position());
3539
}
3640
};
3741
}
@@ -72,13 +76,15 @@ macro_rules! impl_test_signed_leb128 {
7276
let n = $write_fn_name(&mut buf, x);
7377
stream.extend(&buf[..n]);
7478
}
79+
let stream_end = stream.len();
80+
stream.extend(MAGIC_END_BYTES);
7581

76-
let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);
82+
let mut decoder = MemDecoder::new(&stream, 0).unwrap();
7783
for &expected in &values {
7884
let actual = $read_fn_name(&mut decoder);
7985
assert_eq!(expected, actual);
8086
}
81-
assert_eq!(stream.len(), decoder.position());
87+
assert_eq!(stream_end, decoder.position());
8288
}
8389
};
8490
}

0 commit comments

Comments
 (0)