Skip to content

Commit a1c3353

Browse files
feat(prof): configurable allocation sampling distance (#3227)
1 parent af1e5d8 commit a1c3353

7 files changed

Lines changed: 89 additions & 14 deletions

File tree

.github/workflows/prof_correctness.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ jobs:
9797
export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/"$test_case"/test.pprof
9898
php -d extension=$PWD/target/release/libdatadog_php_profiling.so profiling/tests/correctness/"$test_case".php
9999
done
100+
mkdir -p profiling/tests/correctness/allocations_1byte/
101+
export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/allocations_1byte/test.pprof
102+
export DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE=1
103+
php -d extension=$PWD/target/release/libdatadog_php_profiling.so profiling/tests/correctness/allocations.php
104+
unset DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE
100105
101106
- name: Run ZTS tests
102107
if: matrix.phpts == 'zts'
@@ -120,6 +125,13 @@ jobs:
120125
expected_json: profiling/tests/correctness/allocations.json
121126
pprof_path: profiling/tests/correctness/allocations/
122127

128+
- name: Check profiler correctness for allocations with 1 byte sampling distance
129+
if: matrix.phpts != 'zts'
130+
uses: Datadog/prof-correctness/analyze@main
131+
with:
132+
expected_json: profiling/tests/correctness/allocations.json
133+
pprof_path: profiling/tests/correctness/allocations_1byte/
134+
123135
- name: Check profiler correctness for time
124136
uses: Datadog/prof-correctness/analyze@main
125137
with:

profiling/src/allocation/mod.rs

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@ use log::{error, trace};
66
use rand::rngs::ThreadRng;
77
use rand_distr::{Distribution, Poisson};
88
use std::cell::RefCell;
9-
use std::sync::atomic::AtomicU64;
9+
use std::sync::atomic::{AtomicU64, Ordering};
1010

1111
#[cfg(php_zend_mm_set_custom_handlers_ex)]
1212
pub mod allocation_ge84;
1313
#[cfg(not(php_zend_mm_set_custom_handlers_ex))]
1414
pub mod allocation_le83;
1515

16-
/// take a sample every 4096 KiB
17-
pub const ALLOCATION_PROFILING_INTERVAL: f64 = 1024.0 * 4096.0;
16+
/// Default sampling interval in bytes (4MB)
17+
pub const DEFAULT_ALLOCATION_SAMPLING_INTERVAL: u64 = 1024 * 4096;
18+
19+
/// Sampling distance feed into poison sampling algo
20+
pub static ALLOCATION_PROFILING_INTERVAL: AtomicU64 = AtomicU64::new(DEFAULT_ALLOCATION_SAMPLING_INTERVAL);
1821

1922
/// This will store the count of allocations (including reallocations) during
2023
/// a profiling period. This will overflow when doing more than u64::MAX
@@ -36,7 +39,8 @@ pub struct AllocationProfilingStats {
3639
impl AllocationProfilingStats {
3740
fn new() -> AllocationProfilingStats {
3841
// Safety: this will only error if lambda <= 0
39-
let poisson = Poisson::new(ALLOCATION_PROFILING_INTERVAL).unwrap();
42+
let poisson =
43+
Poisson::new(ALLOCATION_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64).unwrap();
4044
let mut stats = AllocationProfilingStats {
4145
next_sample: 0,
4246
poisson,
@@ -94,6 +98,32 @@ pub fn alloc_prof_startup() {
9498
allocation_le83::alloc_prof_startup();
9599
}
96100

101+
pub fn alloc_prof_first_rinit() {
102+
let allocation_profiling = REQUEST_LOCALS.with(|cell| {
103+
cell.try_borrow()
104+
.map(|locals| locals.system_settings().profiling_allocation_enabled)
105+
.unwrap_or(false)
106+
});
107+
108+
if !allocation_profiling {
109+
return;
110+
}
111+
112+
let sampling_distance = REQUEST_LOCALS.with(|cell| {
113+
match cell.try_borrow() {
114+
Ok(locals) => locals.system_settings().profiling_allocation_sampling_distance,
115+
Err(_err) => {
116+
error!("Allocation profiling was not initialized correctly due to a borrow error. Please report this to Datadog.");
117+
DEFAULT_ALLOCATION_SAMPLING_INTERVAL as u32
118+
}
119+
}
120+
});
121+
122+
ALLOCATION_PROFILING_INTERVAL.store(sampling_distance as u64, Ordering::SeqCst);
123+
124+
trace!("Memory allocation profiling initialized with a sampling distance of {} bytes.", ALLOCATION_PROFILING_INTERVAL.load(Ordering::SeqCst));
125+
}
126+
97127
pub fn alloc_prof_rinit() {
98128
let allocation_profiling: bool = REQUEST_LOCALS.with(|cell| {
99129
match cell.try_borrow() {
@@ -116,8 +146,6 @@ pub fn alloc_prof_rinit() {
116146
allocation_le83::alloc_prof_rinit();
117147
#[cfg(php_zend_mm_set_custom_handlers_ex)]
118148
allocation_ge84::alloc_prof_rinit();
119-
120-
trace!("Memory allocation profiling enabled.")
121149
}
122150

123151
pub fn alloc_prof_rshutdown() {

profiling/src/config.rs

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ pub struct SystemSettings {
2727
pub profiling_endpoint_collection_enabled: bool,
2828
pub profiling_experimental_cpu_time_enabled: bool,
2929
pub profiling_allocation_enabled: bool,
30+
pub profiling_allocation_sampling_distance: u32,
3031
pub profiling_timeline_enabled: bool,
3132
pub profiling_exception_enabled: bool,
3233
pub profiling_exception_message_enabled: bool,
@@ -69,6 +70,7 @@ impl SystemSettings {
6970
profiling_endpoint_collection_enabled: profiling_endpoint_collection_enabled(),
7071
profiling_experimental_cpu_time_enabled: profiling_experimental_cpu_time_enabled(),
7172
profiling_allocation_enabled: profiling_allocation_enabled(),
73+
profiling_allocation_sampling_distance: profiling_allocation_sampling_distance(),
7274
profiling_timeline_enabled: profiling_timeline_enabled(),
7375
profiling_exception_enabled: profiling_exception_enabled(),
7476
profiling_exception_message_enabled: profiling_exception_message_enabled(),
@@ -137,6 +139,7 @@ impl SystemSettings {
137139
profiling_endpoint_collection_enabled: false,
138140
profiling_experimental_cpu_time_enabled: false,
139141
profiling_allocation_enabled: false,
142+
profiling_allocation_sampling_distance: 0,
140143
profiling_timeline_enabled: false,
141144
profiling_exception_enabled: false,
142145
profiling_exception_message_enabled: false,
@@ -352,6 +355,7 @@ pub(crate) enum ConfigId {
352355
ProfilingEndpointCollectionEnabled,
353356
ProfilingExperimentalCpuTimeEnabled,
354357
ProfilingAllocationEnabled,
358+
ProfilingAllocationSamplingDistance,
355359
ProfilingTimelineEnabled,
356360
ProfilingExceptionEnabled,
357361
ProfilingExceptionMessageEnabled,
@@ -381,6 +385,7 @@ impl ConfigId {
381385
ProfilingEndpointCollectionEnabled => b"DD_PROFILING_ENDPOINT_COLLECTION_ENABLED\0",
382386
ProfilingExperimentalCpuTimeEnabled => b"DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED\0",
383387
ProfilingAllocationEnabled => b"DD_PROFILING_ALLOCATION_ENABLED\0",
388+
ProfilingAllocationSamplingDistance => b"DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE\0",
384389
ProfilingTimelineEnabled => b"DD_PROFILING_TIMELINE_ENABLED\0",
385390
ProfilingExceptionEnabled => b"DD_PROFILING_EXCEPTION_ENABLED\0",
386391
ProfilingExceptionMessageEnabled => b"DD_PROFILING_EXCEPTION_MESSAGE_ENABLED\0",
@@ -425,6 +430,7 @@ lazy_static::lazy_static! {
425430
profiling_endpoint_collection_enabled: false,
426431
profiling_experimental_cpu_time_enabled: false,
427432
profiling_allocation_enabled: false,
433+
profiling_allocation_sampling_distance: u32::MAX,
428434
profiling_timeline_enabled: false,
429435
profiling_exception_enabled: false,
430436
profiling_exception_message_enabled: false,
@@ -443,6 +449,7 @@ lazy_static::lazy_static! {
443449
profiling_endpoint_collection_enabled: true,
444450
profiling_experimental_cpu_time_enabled: true,
445451
profiling_allocation_enabled: true,
452+
profiling_allocation_sampling_distance: 1024 * 4096,
446453
profiling_timeline_enabled: true,
447454
profiling_exception_enabled: true,
448455
profiling_exception_message_enabled: false,
@@ -507,6 +514,17 @@ unsafe fn profiling_allocation_enabled() -> bool {
507514
)
508515
}
509516

517+
/// # Safety
518+
/// This function must only be called after config has been initialized in
519+
/// rinit, and before it is uninitialized in mshutdown.
520+
unsafe fn profiling_allocation_sampling_distance() -> u32 {
521+
get_system_uint32(
522+
ProfilingAllocationSamplingDistance,
523+
DEFAULT_SYSTEM_SETTINGS.profiling_allocation_sampling_distance,
524+
)
525+
}
526+
527+
510528
/// # Safety
511529
/// This function must only be called after config has been initialized in
512530
/// rinit, and before it is uninitialized in mshutdown.
@@ -696,8 +714,8 @@ unsafe fn profiling_log_level() -> LevelFilter {
696714
}
697715
}
698716

699-
/// Parses the exception sampling distance and makes sure it is ℤ+ (positive integer > 0)
700-
unsafe extern "C" fn parse_exception_sampling_distance_filter(
717+
/// Parses the sampling distance and makes sure it is ℤ+ (positive integer > 0)
718+
unsafe extern "C" fn parse_sampling_distance_filter(
701719
value: ZaiStr,
702720
decoded_value: *mut zval,
703721
_persistent: bool,
@@ -942,6 +960,18 @@ pub(crate) fn minit(module_number: libc::c_int) {
942960
displayer: None,
943961
env_config_fallback: None,
944962
},
963+
zai_config_entry {
964+
id: transmute::<ConfigId, u16>(ProfilingAllocationSamplingDistance),
965+
name: ProfilingAllocationSamplingDistance.env_var_name(),
966+
type_: ZAI_CONFIG_TYPE_CUSTOM,
967+
default_encoded_value: ZaiStr::literal(b"4194304\0"), // crate::allocation::DEFAULT_ALLOCATION_SAMPLING_INTERVAL
968+
aliases: ptr::null_mut(),
969+
aliases_count: 0,
970+
ini_change: Some(zai_config_system_ini_change),
971+
parser: Some(parse_sampling_distance_filter),
972+
displayer: None,
973+
env_config_fallback: None,
974+
},
945975
zai_config_entry {
946976
id: transmute::<ConfigId, u16>(ProfilingTimelineEnabled),
947977
name: ProfilingTimelineEnabled.env_var_name(),
@@ -986,7 +1016,7 @@ pub(crate) fn minit(module_number: libc::c_int) {
9861016
aliases: EXCEPTION_SAMPLING_DISTANCE_ALIASES.as_ptr(),
9871017
aliases_count: EXCEPTION_SAMPLING_DISTANCE_ALIASES.len() as u8,
9881018
ini_change: Some(zai_config_system_ini_change),
989-
parser: Some(parse_exception_sampling_distance_filter),
1019+
parser: Some(parse_sampling_distance_filter),
9901020
displayer: None,
9911021
env_config_fallback: None,
9921022
},

profiling/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,9 @@ extern "C" fn rinit(_type: c_int, _module_number: c_int) -> ZendResult {
606606

607607
#[cfg(all(feature = "io_profiling", target_os = "linux"))]
608608
io::io_prof_first_rinit();
609+
610+
#[cfg(feature = "allocation_profiling")]
611+
allocation::alloc_prof_first_rinit();
609612
});
610613

611614
Profiler::init(system_settings);

profiling/src/profiling/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ impl TimeCollector {
370370
let upscaling_info = UpscalingInfo::Poisson {
371371
sum_value_offset: alloc_size_offset,
372372
count_value_offset: alloc_samples_offset,
373-
sampling_distance: ALLOCATION_PROFILING_INTERVAL as u64,
373+
sampling_distance: ALLOCATION_PROFILING_INTERVAL.load(Ordering::SeqCst),
374374
};
375375
let values_offset = [alloc_size_offset, alloc_samples_offset];
376376
match profile.add_upscaling_rule(&values_offset, "", "", upscaling_info) {
@@ -1563,7 +1563,7 @@ pub struct JoinError {
15631563
#[cfg(test)]
15641564
mod tests {
15651565
use super::*;
1566-
use crate::config::AgentEndpoint;
1566+
use crate::{allocation::DEFAULT_ALLOCATION_SAMPLING_INTERVAL, config::AgentEndpoint};
15671567
use datadog_profiling::exporter::Uri;
15681568
use log::LevelFilter;
15691569

@@ -1582,6 +1582,7 @@ mod tests {
15821582
profiling_endpoint_collection_enabled: false,
15831583
profiling_experimental_cpu_time_enabled: false,
15841584
profiling_allocation_enabled: false,
1585+
profiling_allocation_sampling_distance: DEFAULT_ALLOCATION_SAMPLING_INTERVAL as u32,
15851586
profiling_timeline_enabled: false,
15861587
profiling_exception_enabled: false,
15871588
profiling_exception_message_enabled: false,

profiling/tests/phpt/allocation_bind_static_01.phpt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ https://github.com/php/php-src/commit/ec54ffad1e3b15fedfd07f7d29d97ec3e8d1c45a
1313
But there's an `zend_array_dup` operation which can occur before this the call
1414
to `SAVE_OPLINE()`, so if the allocation profiler triggers there, it will
1515
access the invalid opline (non-null and invalid).
16-
17-
TODO: note that this probably will not fail, because we have to take a sample
18-
at exactly the right time, and currently we don't have a way to force this.
16+
--ENV--
17+
DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE=1
1918
--SKIPIF--
2019
<?php
2120
if (PHP_VERSION_ID < 70400)

profiling/tests/phpt/jit_03.phpt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ if (PHP_VERSION_ID < 80400)
1010
echo "skip: PHP Version < 8.4 are not affected", PHP_EOL;
1111
if (!extension_loaded('datadog-profiling'))
1212
echo "skip: test requires datadog-profiling", PHP_EOL;
13+
if (php_uname("s") === "Darwin")
14+
echo "skip: 'Darwin' has no JIT", PHP_EOL;
1315
?>
1416
--INI--
1517
datadog.profiling.enabled=yes

0 commit comments

Comments
 (0)