Skip to content

Commit 11392c9

Browse files
committed
Auto merge of #117329 - RalfJung:offset-by-zero, r=oli-obk,scottmcm
offset: allow zero-byte offset on arbitrary pointers As per prior `@rust-lang/opsem` [discussion](rust-lang/opsem-team#10) and [FCP](rust-lang/unsafe-code-guidelines#472 (comment)): - Zero-sized reads and writes are allowed on all sufficiently aligned pointers, including the null pointer - Inbounds-offset-by-zero is allowed on all pointers, including the null pointer - `offset_from` on two pointers derived from the same allocation is always allowed when they have the same address This removes surprising UB (in particular, even C++ allows "nullptr + 0", which we currently disallow), and it brings us one step closer to an important theoretical property for our semantics ("provenance monotonicity": if operations are valid on bytes without provenance, then adding provenance can't make them invalid). The minimum LLVM we require (v17) includes https://reviews.llvm.org/D154051, so we can finally implement this. The `offset_from` change is needed to maintain the equivalence with `offset`: if `let ptr2 = ptr1.offset(N)` is well-defined, then `ptr2.offset_from(ptr1)` should be well-defined and return N. Now consider the case where N is 0 and `ptr1` dangles: we want to still allow offset_from here. I think we should change offset_from further, but that's a separate discussion. Fixes #65108 [Tracking issue](#117945) | [T-lang summary](#117329 (comment)) Cc `@nikic`
2 parents bec1029 + 9526ce6 commit 11392c9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+202
-476
lines changed

compiler/rustc_const_eval/src/const_eval/machine.rs

+15-5
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ use rustc_target::spec::abi::Abi as CallAbi;
2525
use crate::errors::{LongRunning, LongRunningWarn};
2626
use crate::fluent_generated as fluent;
2727
use crate::interpret::{
28-
self, compile_time_machine, err_ub, throw_exhaust, throw_inval, throw_ub_custom,
28+
self, compile_time_machine, err_ub, throw_exhaust, throw_inval, throw_ub_custom, throw_unsup,
2929
throw_unsup_format, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, FnVal, Frame,
30-
ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, Scalar,
30+
GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, Scalar,
3131
};
3232

3333
use super::error::*;
@@ -759,11 +759,21 @@ impl<'mir, 'tcx> interpret::Machine<'mir, 'tcx> for CompileTimeInterpreter<'mir,
759759
ecx: &InterpCx<'mir, 'tcx, Self>,
760760
alloc_id: AllocId,
761761
) -> InterpResult<'tcx> {
762+
// Check if this is the currently evaluated static.
762763
if Some(alloc_id) == ecx.machine.static_root_ids.map(|(id, _)| id) {
763-
Err(ConstEvalErrKind::RecursiveStatic.into())
764-
} else {
765-
Ok(())
764+
return Err(ConstEvalErrKind::RecursiveStatic.into());
766765
}
766+
// If this is another static, make sure we fire off the query to detect cycles.
767+
// But only do that when checks for static recursion are enabled.
768+
if ecx.machine.static_root_ids.is_some() {
769+
if let Some(GlobalAlloc::Static(def_id)) = ecx.tcx.try_get_global_alloc(alloc_id) {
770+
if ecx.tcx.is_foreign_item(def_id) {
771+
throw_unsup!(ExternStatic(def_id));
772+
}
773+
ecx.ctfe_query(|tcx| tcx.eval_static_initializer(def_id))?;
774+
}
775+
}
776+
Ok(())
767777
}
768778
}
769779

compiler/rustc_const_eval/src/interpret/intrinsics.rs

+1
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
255255
name = intrinsic_name,
256256
);
257257
}
258+
// This will always return 0.
258259
(a, b)
259260
}
260261
(Err(_), _) | (_, Err(_)) => {

compiler/rustc_const_eval/src/interpret/memory.rs

+19-20
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
413413
/// to the allocation it points to. Supports both shared and mutable references, as the actual
414414
/// checking is offloaded to a helper closure.
415415
///
416+
/// `alloc_size` will only get called for non-zero-sized accesses.
417+
///
416418
/// Returns `None` if and only if the size is 0.
417419
fn check_and_deref_ptr<T>(
418420
&self,
@@ -425,18 +427,19 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
425427
M::ProvenanceExtra,
426428
) -> InterpResult<'tcx, (Size, Align, T)>,
427429
) -> InterpResult<'tcx, Option<T>> {
430+
// Everything is okay with size 0.
431+
if size.bytes() == 0 {
432+
return Ok(None);
433+
}
434+
428435
Ok(match self.ptr_try_get_alloc_id(ptr) {
429436
Err(addr) => {
430-
// We couldn't get a proper allocation. This is only okay if the access size is 0,
431-
// and the address is not null.
432-
if size.bytes() > 0 || addr == 0 {
433-
throw_ub!(DanglingIntPointer(addr, msg));
434-
}
435-
None
437+
// We couldn't get a proper allocation.
438+
throw_ub!(DanglingIntPointer(addr, msg));
436439
}
437440
Ok((alloc_id, offset, prov)) => {
438441
let (alloc_size, _alloc_align, ret_val) = alloc_size(alloc_id, offset, prov)?;
439-
// Test bounds. This also ensures non-null.
442+
// Test bounds.
440443
// It is sufficient to check this for the end pointer. Also check for overflow!
441444
if offset.checked_add(size, &self.tcx).map_or(true, |end| end > alloc_size) {
442445
throw_ub!(PointerOutOfBounds {
@@ -447,14 +450,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
447450
msg,
448451
})
449452
}
450-
// Ensure we never consider the null pointer dereferenceable.
451-
if M::Provenance::OFFSET_IS_ADDR {
452-
assert_ne!(ptr.addr(), Size::ZERO);
453-
}
454453

455-
// We can still be zero-sized in this branch, in which case we have to
456-
// return `None`.
457-
if size.bytes() == 0 { None } else { Some(ret_val) }
454+
Some(ret_val)
458455
}
459456
})
460457
}
@@ -641,16 +638,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
641638
size,
642639
CheckInAllocMsg::MemoryAccessTest,
643640
|alloc_id, offset, prov| {
644-
if !self.memory.validation_in_progress.get() {
645-
// We want to call the hook on *all* accesses that involve an AllocId,
646-
// including zero-sized accesses. That means we have to do it here
647-
// rather than below in the `Some` branch.
648-
M::before_alloc_read(self, alloc_id)?;
649-
}
650641
let alloc = self.get_alloc_raw(alloc_id)?;
651642
Ok((alloc.size(), alloc.align, (alloc_id, offset, prov, alloc)))
652643
},
653644
)?;
645+
// We want to call the hook on *all* accesses that involve an AllocId, including zero-sized
646+
// accesses. That means we cannot rely on the closure above or the `Some` branch below. We
647+
// do this after `check_and_deref_ptr` to ensure some basic sanity has already been checked.
648+
if !self.memory.validation_in_progress.get() {
649+
if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(ptr) {
650+
M::before_alloc_read(self, alloc_id)?;
651+
}
652+
}
654653

655654
if let Some((alloc_id, offset, prov, alloc)) = ptr_and_alloc {
656655
let range = alloc_range(offset, size);

compiler/rustc_const_eval/src/interpret/validity.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,11 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
434434
found_bytes: has.bytes()
435435
},
436436
);
437+
// Make sure this is non-null. We checked dereferenceability above, but if `size` is zero
438+
// that does not imply non-null.
439+
if self.ecx.scalar_may_be_null(Scalar::from_maybe_pointer(place.ptr(), self.ecx))? {
440+
throw_validation_failure!(self.path, NullPtr { ptr_kind })
441+
}
437442
// Do not allow pointers to uninhabited types.
438443
if place.layout.abi.is_uninhabited() {
439444
let ty = place.layout.ty;
@@ -456,8 +461,8 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
456461
// `!` is a ZST and we want to validate it.
457462
if let Ok((alloc_id, _offset, _prov)) = self.ecx.ptr_try_get_alloc_id(place.ptr()) {
458463
let mut skip_recursive_check = false;
459-
let alloc_actual_mutbl = mutability(self.ecx, alloc_id);
460-
if let GlobalAlloc::Static(did) = self.ecx.tcx.global_alloc(alloc_id) {
464+
if let Some(GlobalAlloc::Static(did)) = self.ecx.tcx.try_get_global_alloc(alloc_id)
465+
{
461466
let DefKind::Static { nested, .. } = self.ecx.tcx.def_kind(did) else { bug!() };
462467
// Special handling for pointers to statics (irrespective of their type).
463468
assert!(!self.ecx.tcx.is_thread_local_static(did));
@@ -495,6 +500,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
495500
// If this allocation has size zero, there is no actual mutability here.
496501
let (size, _align, _alloc_kind) = self.ecx.get_alloc_info(alloc_id);
497502
if size != Size::ZERO {
503+
let alloc_actual_mutbl = mutability(self.ecx, alloc_id);
498504
// Mutable pointer to immutable memory is no good.
499505
if ptr_expected_mutbl == Mutability::Mut
500506
&& alloc_actual_mutbl == Mutability::Not
@@ -831,6 +837,8 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
831837
trace!("visit_value: {:?}, {:?}", *op, op.layout);
832838

833839
// Check primitive types -- the leaves of our recursive descent.
840+
// We assume that the Scalar validity range does not restrict these values
841+
// any further than `try_visit_primitive` does!
834842
if self.try_visit_primitive(op)? {
835843
return Ok(());
836844
}

library/core/src/intrinsics.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1483,10 +1483,10 @@ extern "rust-intrinsic" {
14831483
///
14841484
/// # Safety
14851485
///
1486-
/// Both the starting and resulting pointer must be either in bounds or one
1487-
/// byte past the end of an allocated object. If either pointer is out of
1488-
/// bounds or arithmetic overflow occurs then any further use of the
1489-
/// returned value will result in undefined behavior.
1486+
/// If the computed offset is non-zero, then both the starting and resulting pointer must be
1487+
/// either in bounds or at the end of an allocated object. If either pointer is out
1488+
/// of bounds or arithmetic overflow occurs then any further use of the returned value will
1489+
/// result in undefined behavior.
14901490
///
14911491
/// The stabilized version of this intrinsic is [`pointer::offset`].
14921492
#[must_use = "returns a new pointer rather than modifying its argument"]
@@ -1502,7 +1502,7 @@ extern "rust-intrinsic" {
15021502
/// # Safety
15031503
///
15041504
/// Unlike the `offset` intrinsic, this intrinsic does not restrict the
1505-
/// resulting pointer to point into or one byte past the end of an allocated
1505+
/// resulting pointer to point into or at the end of an allocated
15061506
/// object, and it wraps with two's complement arithmetic. The resulting
15071507
/// value is not necessarily valid to be used to actually access memory.
15081508
///

library/core/src/ptr/const_ptr.rs

+13-10
Original file line numberDiff line numberDiff line change
@@ -465,8 +465,9 @@ impl<T: ?Sized> *const T {
465465
/// If any of the following conditions are violated, the result is Undefined
466466
/// Behavior:
467467
///
468-
/// * Both the starting and resulting pointer must be either in bounds or one
469-
/// byte past the end of the same [allocated object].
468+
/// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
469+
/// pointer must be either in bounds or at the end of the same [allocated object].
470+
/// (If it is zero, then the function is always well-defined.)
470471
///
471472
/// * The computed offset, **in bytes**, cannot overflow an `isize`.
472473
///
@@ -676,11 +677,11 @@ impl<T: ?Sized> *const T {
676677
/// If any of the following conditions are violated, the result is Undefined
677678
/// Behavior:
678679
///
679-
/// * Both `self` and `origin` must be either in bounds or one
680-
/// byte past the end of the same [allocated object].
680+
/// * `self` and `origin` must either
681681
///
682-
/// * Both pointers must be *derived from* a pointer to the same object.
683-
/// (See below for an example.)
682+
/// * both be *derived from* a pointer to the same [allocated object], and the memory range between
683+
/// the two pointers must be either empty or in bounds of that object. (See below for an example.)
684+
/// * or both be derived from an integer literal/constant, and point to the same address.
684685
///
685686
/// * The distance between the pointers, in bytes, must be an exact multiple
686687
/// of the size of `T`.
@@ -951,8 +952,9 @@ impl<T: ?Sized> *const T {
951952
/// If any of the following conditions are violated, the result is Undefined
952953
/// Behavior:
953954
///
954-
/// * Both the starting and resulting pointer must be either in bounds or one
955-
/// byte past the end of the same [allocated object].
955+
/// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
956+
/// pointer must be either in bounds or at the end of the same [allocated object].
957+
/// (If it is zero, then the function is always well-defined.)
956958
///
957959
/// * The computed offset, **in bytes**, cannot overflow an `isize`.
958960
///
@@ -1035,8 +1037,9 @@ impl<T: ?Sized> *const T {
10351037
/// If any of the following conditions are violated, the result is Undefined
10361038
/// Behavior:
10371039
///
1038-
/// * Both the starting and resulting pointer must be either in bounds or one
1039-
/// byte past the end of the same [allocated object].
1040+
/// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
1041+
/// pointer must be either in bounds or at the end of the same [allocated object].
1042+
/// (If it is zero, then the function is always well-defined.)
10401043
///
10411044
/// * The computed offset cannot exceed `isize::MAX` **bytes**.
10421045
///

library/core/src/ptr/mod.rs

+3-8
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,13 @@
1515
//! The precise rules for validity are not determined yet. The guarantees that are
1616
//! provided at this point are very minimal:
1717
//!
18-
//! * A [null] pointer is *never* valid, not even for accesses of [size zero][zst].
18+
//! * For operations of [size zero][zst], *every* pointer is valid, including the [null] pointer.
19+
//! The following points are only concerned with non-zero-sized accesses.
20+
//! * A [null] pointer is *never* valid.
1921
//! * For a pointer to be valid, it is necessary, but not always sufficient, that the pointer
2022
//! be *dereferenceable*: the memory range of the given size starting at the pointer must all be
2123
//! within the bounds of a single allocated object. Note that in Rust,
2224
//! every (stack-allocated) variable is considered a separate allocated object.
23-
//! * Even for operations of [size zero][zst], the pointer must not be pointing to deallocated
24-
//! memory, i.e., deallocation makes pointers invalid even for zero-sized operations. However,
25-
//! casting any non-zero integer *literal* to a pointer is valid for zero-sized accesses, even if
26-
//! some memory happens to exist at that address and gets deallocated. This corresponds to writing
27-
//! your own allocator: allocating zero-sized objects is not very hard. The canonical way to
28-
//! obtain a pointer that is valid for zero-sized accesses is [`NonNull::dangling`].
29-
//FIXME: mention `ptr::dangling` above, once it is stable.
3025
//! * All accesses performed by functions in this module are *non-atomic* in the sense
3126
//! of [atomic operations] used to synchronize between threads. This means it is
3227
//! undefined behavior to perform two concurrent accesses to the same location from different

library/core/src/ptr/mut_ptr.rs

+13-10
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,9 @@ impl<T: ?Sized> *mut T {
480480
/// If any of the following conditions are violated, the result is Undefined
481481
/// Behavior:
482482
///
483-
/// * Both the starting and resulting pointer must be either in bounds or one
484-
/// byte past the end of the same [allocated object].
483+
/// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
484+
/// pointer must be either in bounds or at the end of the same [allocated object].
485+
/// (If it is zero, then the function is always well-defined.)
485486
///
486487
/// * The computed offset, **in bytes**, cannot overflow an `isize`.
487488
///
@@ -904,11 +905,11 @@ impl<T: ?Sized> *mut T {
904905
/// If any of the following conditions are violated, the result is Undefined
905906
/// Behavior:
906907
///
907-
/// * Both `self` and `origin` must be either in bounds or one
908-
/// byte past the end of the same [allocated object].
908+
/// * `self` and `origin` must either
909909
///
910-
/// * Both pointers must be *derived from* a pointer to the same object.
911-
/// (See below for an example.)
910+
/// * both be *derived from* a pointer to the same [allocated object], and the memory range between
911+
/// the two pointers must be either empty or in bounds of that object. (See below for an example.)
912+
/// * or both be derived from an integer literal/constant, and point to the same address.
912913
///
913914
/// * The distance between the pointers, in bytes, must be an exact multiple
914915
/// of the size of `T`.
@@ -1095,8 +1096,9 @@ impl<T: ?Sized> *mut T {
10951096
/// If any of the following conditions are violated, the result is Undefined
10961097
/// Behavior:
10971098
///
1098-
/// * Both the starting and resulting pointer must be either in bounds or one
1099-
/// byte past the end of the same [allocated object].
1099+
/// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
1100+
/// pointer must be either in bounds or at the end of the same [allocated object].
1101+
/// (If it is zero, then the function is always well-defined.)
11001102
///
11011103
/// * The computed offset, **in bytes**, cannot overflow an `isize`.
11021104
///
@@ -1179,8 +1181,9 @@ impl<T: ?Sized> *mut T {
11791181
/// If any of the following conditions are violated, the result is Undefined
11801182
/// Behavior:
11811183
///
1182-
/// * Both the starting and resulting pointer must be either in bounds or one
1183-
/// byte past the end of the same [allocated object].
1184+
/// * If the computed offset, **in bytes**, is non-zero, then both the starting and resulting
1185+
/// pointer must be either in bounds or at the end of the same [allocated object].
1186+
/// (If it is zero, then the function is always well-defined.)
11841187
///
11851188
/// * The computed offset cannot exceed `isize::MAX` **bytes**.
11861189
///

src/tools/miri/tests/fail/dangling_pointers/dangling_zst_deref.rs

-10
This file was deleted.

src/tools/miri/tests/fail/dangling_pointers/dangling_zst_deref.stderr

-25
This file was deleted.

src/tools/miri/tests/fail/dangling_pointers/maybe_null_pointer_deref_zst.rs

-5
This file was deleted.

src/tools/miri/tests/fail/dangling_pointers/maybe_null_pointer_deref_zst.stderr

-15
This file was deleted.

src/tools/miri/tests/fail/dangling_pointers/maybe_null_pointer_write_zst.rs

-8
This file was deleted.

0 commit comments

Comments
 (0)