Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions tests/script-based-pre/tool-scanner/scanner-test.expected
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
5 test_scan_fn_loops.csv
19 test_scan_functions.csv
20 test_scan_functions.csv
5 test_scan_input_tys.csv
16 test_scan_overall.csv
17 test_scan_overall.csv
3 test_scan_recursion.csv
5 test_scan_unsafe_ops.csv
9 test_scan_unsafe_distance.csv
6 test_scan_unsafe_ops.csv

Unsafe Distance Results
call_external;0
next_id;0
external_function;0
raw_to_ref;0
with_for_loop;1
check_outer_coercion;1
with_iterator;2
current_id;0
generic;0
6 changes: 6 additions & 0 deletions tests/script-based-pre/tool-scanner/scanner-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,11 @@ pushd $OUT_DIR
cargo run -p scanner test.rs --crate-type lib
wc -l *csv

# How to intepret these results:
# - If the function is "truly safe," i.e., there's no unsafe in its call graph, it will not show up in the output at all.
# - Otherwise, the count should match the rules described in scanner::call_graph::OverallStats::unsafe_distance.
echo "Unsafe Distance Results"
cat test_scan_unsafe_distance.csv

popd
rm -rf ${OUT_DIR}
9 changes: 9 additions & 0 deletions tests/script-based-pre/tool-scanner/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,12 @@ pub fn start_recursion() {
pub fn not_recursive() {
let _ = ok();
}

extern "C" {
fn external_function();
}

/// Ensure scanner finds unsafe calls to external functions.
pub fn call_external() {
unsafe { external_function() };
}
83 changes: 67 additions & 16 deletions tools/scanner/src/analysis.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Provide different static analysis to be performed in the crate under compilation
//! Provide passes that perform intra-function analysis on the crate under compilation

use crate::info;
use csv::WriterBuilder;
Expand All @@ -11,9 +11,10 @@ use serde::{Serialize, Serializer, ser::SerializeStruct};
use stable_mir::mir::mono::Instance;
use stable_mir::mir::visit::{Location, PlaceContext, PlaceRef};
use stable_mir::mir::{
BasicBlock, Body, MirVisitor, Mutability, ProjectionElem, Safety, Terminator, TerminatorKind,
BasicBlock, Body, CastKind, MirVisitor, Mutability, NonDivergingIntrinsic, ProjectionElem,
Rvalue, Safety, Statement, StatementKind, Terminator, TerminatorKind,
};
use stable_mir::ty::{AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
use stable_mir::ty::{Abi, AdtDef, AdtKind, FnDef, GenericArgs, MirConst, RigidTy, Ty, TyKind};
use stable_mir::visitor::{Visitable, Visitor};
use stable_mir::{CrateDef, CrateItem};
use std::collections::{HashMap, HashSet};
Expand All @@ -23,7 +24,7 @@ use std::path::{Path, PathBuf};
#[derive(Clone, Debug)]
pub struct OverallStats {
/// The key and value of each counter.
counters: Vec<(&'static str, usize)>,
pub counters: Vec<(&'static str, usize)>,
/// TODO: Group stats per function.
fn_stats: HashMap<CrateItem, FnStats>,
}
Expand All @@ -49,6 +50,12 @@ impl FnStats {
}
}

impl Default for OverallStats {
fn default() -> Self {
Self::new()
}
}

impl OverallStats {
pub fn new() -> OverallStats {
let all_items = stable_mir::all_local_items();
Expand Down Expand Up @@ -232,24 +239,24 @@ impl OverallStats {

macro_rules! fn_props {
($(#[$attr:meta])*
struct $name:ident {
$vis:vis struct $name:ident {
$(
$(#[$prop_attr:meta])*
$prop:ident,
)+
}) => {
#[derive(Debug)]
struct $name {
$vis struct $name {
fn_name: String,
$($(#[$prop_attr])* $prop: usize,)+
}

impl $name {
const fn num_props() -> usize {
$vis const fn num_props() -> usize {
[$(stringify!($prop),)+].len()
}

fn new(fn_name: String) -> Self {
$vis fn new(fn_name: String) -> Self {
Self { fn_name, $($prop: 0,)+}
}
}
Expand Down Expand Up @@ -369,7 +376,7 @@ impl Visitor for TypeVisitor<'_> {
}
}

fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
pub(crate) fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
out_path.set_extension("csv");
info(format!("Write file: {out_path:?}"));
let mut writer = WriterBuilder::new().delimiter(b';').from_path(&out_path).unwrap();
Expand All @@ -379,17 +386,23 @@ fn dump_csv<T: Serialize>(mut out_path: PathBuf, data: &[T]) {
}

fn_props! {
struct FnUnsafeOperations {
pub struct FnUnsafeOperations {
inline_assembly,
/// Dereference a raw pointer.
/// This is also counted when we access a static variable since it gets translated to a raw pointer.
unsafe_dereference,
/// Call an unsafe function or method.
/// Call an unsafe function or method including C-FFI.
unsafe_call,
/// Access or modify a mutable static variable.
unsafe_static_access,
/// Access fields of unions.
unsafe_union_access,
/// Invoke external functions (this is a subset of `unsafe_call`.
extern_call,
/// Transmute operations.
transmute,
/// Cast raw pointer to reference.
unsafe_cast,
}
}

Expand Down Expand Up @@ -419,9 +432,19 @@ impl MirVisitor for BodyVisitor<'_> {
fn visit_terminator(&mut self, term: &Terminator, location: Location) {
match &term.kind {
TerminatorKind::Call { func, .. } => {
let fn_sig = func.ty(self.body.locals()).unwrap().kind().fn_sig().unwrap();
if fn_sig.value.safety == Safety::Unsafe {
let TyKind::RigidTy(RigidTy::FnDef(fn_def, _)) =
func.ty(self.body.locals()).unwrap().kind()
else {
return self.super_terminator(term, location);
};
let fn_sig = fn_def.fn_sig().skip_binder();
if fn_sig.safety == Safety::Unsafe {
self.props.unsafe_call += 1;
if !matches!(fn_sig.abi, Abi::Rust | Abi::RustCold | Abi::RustCall)
&& !fn_def.has_body()
{
self.props.extern_call += 1;
}
}
}
TerminatorKind::InlineAsm { .. } => self.props.inline_assembly += 1,
Expand All @@ -430,6 +453,34 @@ impl MirVisitor for BodyVisitor<'_> {
self.super_terminator(term, location)
}

fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) {
if let Rvalue::Cast(cast_kind, operand, ty) = rvalue {
match cast_kind {
CastKind::Transmute => {
self.props.transmute += 1;
}
_ => {
let operand_ty = operand.ty(self.body.locals()).unwrap();
if ty.kind().is_ref() && operand_ty.kind().is_raw_ptr() {
self.props.unsafe_cast += 1;
}
}
}
};
self.super_rvalue(rvalue, location);
}

fn visit_statement(&mut self, stmt: &Statement, location: Location) {
if matches!(
&stmt.kind,
StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(_))
) {
// Treat this as invoking the copy intrinsic.
self.props.unsafe_call += 1;
}
self.super_statement(stmt, location)
}

fn visit_projection_elem(
&mut self,
place: PlaceRef,
Expand Down Expand Up @@ -674,9 +725,9 @@ impl Recursion {
}
}

struct FnCallVisitor<'a> {
body: &'a Body,
fns: Vec<FnDef>,
pub struct FnCallVisitor<'a> {
pub body: &'a Body,
pub fns: Vec<FnDef>,
}

impl MirVisitor for FnCallVisitor<'_> {
Expand Down
110 changes: 110 additions & 0 deletions tools/scanner/src/call_graph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Provide passes that perform inter-function analysis on the crate under compilation.
//!
//! This module also includes a `CallGraph` structure to help the analysis.
//! For now, we build the CallGraph as part of the pass, but as we add more analysis,
//! the call-graph could be reused by different analysis.

use crate::analysis::{FnCallVisitor, FnUnsafeOperations, OverallStats};
use stable_mir::mir::{MirVisitor, Safety};
use stable_mir::ty::{FnDef, RigidTy, Ty, TyKind};
use stable_mir::{CrateDef, CrateDefType};
use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::hash::{Hash, Hasher};
use std::path::PathBuf;

impl OverallStats {
/// Iterate over all functions defined in this crate and log any unsafe operation.
/// Distance indicates how many degrees of separation the function has to unsafe code, if any?
/// - `None` if this function is indeed safe.
/// - 0 if this function contains unsafe code (including invoking unsafe fns).
/// - 1 if this function calls a safe abstraction.
/// - 2+ if this function calls other functions that call safe abstractions.
pub fn unsafe_distance(&mut self, filename: PathBuf) {
let all_items = stable_mir::all_local_items();
let mut queue =
all_items.into_iter().filter_map(|item| Node::try_new(item.ty())).collect::<Vec<_>>();
// Build call graph
let mut call_graph = CallGraph::default();
while let Some(node) = queue.pop() {
if let Entry::Vacant(e) = call_graph.nodes.entry(node.def) {
e.insert(node);
let Some(body) = node.def.body() else {
continue;
};
let mut visitor = FnCallVisitor { body: &body, fns: vec![] };
visitor.visit_body(&body);
queue.extend(visitor.fns.iter().map(|def| Node::try_new(def.ty()).unwrap()));
for callee in &visitor.fns {
call_graph.rev_edges.entry(*callee).or_default().push(node.def)
}
call_graph.edges.insert(node.def, visitor.fns);
}
}

// Calculate the distance between unsafe functions and functions with unsafe operation.
let mut queue = call_graph
.nodes
.values()
.filter_map(|node| node.has_unsafe.then_some((node.def, 0)))
.collect::<VecDeque<_>>();
let mut visited: HashMap<FnDef, u16> = HashMap::from_iter(queue.iter().cloned());
while let Some(current) = queue.pop_front() {
for caller in call_graph.rev_edges.entry(current.0).or_default() {
if !visited.contains_key(caller) {
let distance = current.1 + 1;
visited.insert(*caller, distance);
queue.push_back((*caller, distance))
}
}
}
let krate = stable_mir::local_crate();
let transitive_unsafe = visited
.into_iter()
.filter_map(|(def, distance)| (def.krate() == krate).then_some((def.name(), distance)))
.collect::<Vec<_>>();
self.counters.push(("transitive_unsafe", transitive_unsafe.len()));
crate::analysis::dump_csv(filename, &transitive_unsafe);
}
}

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
struct Node {
def: FnDef,
is_unsafe: bool,
has_unsafe: bool,
}

impl Node {
fn try_new(ty: Ty) -> Option<Node> {
let kind = ty.kind();
let TyKind::RigidTy(RigidTy::FnDef(def, _)) = kind else {
return None;
};
let has_unsafe = if let Some(body) = def.body() {
let unsafe_ops = FnUnsafeOperations::new(def.name()).collect(&body);
unsafe_ops.has_unsafe()
} else {
true
};
let fn_sig = kind.fn_sig().unwrap();
let is_unsafe = fn_sig.skip_binder().safety == Safety::Unsafe;
Some(Node { def, is_unsafe, has_unsafe })
}
}

impl Hash for Node {
fn hash<H: Hasher>(&self, state: &mut H) {
self.def.hash(state)
}
}

#[derive(Default, Debug)]
struct CallGraph {
nodes: HashMap<FnDef, Node>,
edges: HashMap<FnDef, Vec<FnDef>>,
rev_edges: HashMap<FnDef, Vec<FnDef>>,
}
9 changes: 8 additions & 1 deletion tools/scanner/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

#![feature(rustc_private)]

mod analysis;
pub mod analysis;
pub mod call_graph;

extern crate rustc_driver;
extern crate rustc_interface;
Expand Down Expand Up @@ -65,6 +66,8 @@ pub enum Analysis {
FnLoops,
/// Collect information about recursion via direct calls.
Recursion,
/// Collect information about transitive usage of unsafe.
UnsafeDistance,
}

fn info(msg: String) {
Expand All @@ -75,6 +78,9 @@ fn info(msg: String) {

/// This function invoke the required analyses in the given order.
fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> {
if stable_mir::local_crate().name == "build_script_build" {
return ControlFlow::Continue(());
}
let object_file = tcx.output_filenames(()).path(OutputType::Object);
let base_path = object_file.as_path().to_path_buf();
// Use name for now to make it more friendly. Change to base_path.file_stem() to avoid conflict.
Expand All @@ -96,6 +102,7 @@ fn analyze_crate(tcx: TyCtxt, analyses: &[Analysis]) -> ControlFlow<()> {
Analysis::UnsafeOps => crate_stats.unsafe_operations(out_path),
Analysis::FnLoops => crate_stats.loops(out_path),
Analysis::Recursion => crate_stats.recursion(out_path),
Analysis::UnsafeDistance => crate_stats.unsafe_distance(out_path),
}
}
crate_stats.store_csv(base_path, &file_stem);
Expand Down
Loading