Skip to content

Commit 8d07c87

Browse files
bwoebipierotibou
andauthored
Ensure the msgpack contents include only valid utf8 (#2698)
* Ensure the msgpack contents include only valid utf8 Signed-off-by: Bob Weinand <[email protected]> * Update ext/serializer.c Co-authored-by: Pierre Bonet <[email protected]> * Avoid minor perf impact on msgpack serialization with sidecar sender disabled Signed-off-by: Bob Weinand <[email protected]> --------- Signed-off-by: Bob Weinand <[email protected]> Co-authored-by: Pierre Bonet <[email protected]>
1 parent c5f2dcb commit 8d07c87

3 files changed

Lines changed: 43 additions & 3 deletions

File tree

components-rs/ddtrace.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ ddog_CharSlice ddtrace_get_container_id(void);
139139

140140
void ddtrace_set_container_cgroup_path(ddog_CharSlice path);
141141

142+
char *ddtrace_strip_invalid_utf8(const char *input, uintptr_t *len);
143+
144+
void ddtrace_drop_rust_string(char *input, uintptr_t len);
145+
142146
bool ddog_shall_log(enum ddog_Log category);
143147

144148
void ddog_set_error_log_level(bool once);

components-rs/lib.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ pub mod log;
55
pub mod sidecar;
66
pub mod telemetry;
77

8+
use std::borrow::Cow;
9+
use std::ffi::c_char;
10+
use std::ptr::null_mut;
811
use ddcommon::entity_id::{get_container_id, set_cgroup_file};
912
use ddcommon_ffi::CharSlice;
1013
use uuid::Uuid;
@@ -42,3 +45,21 @@ pub extern "C" fn ddtrace_get_container_id() -> CharSlice<'static> {
4245
pub unsafe extern "C" fn ddtrace_set_container_cgroup_path(path: CharSlice) {
4346
set_cgroup_file(String::from(path.try_to_utf8().unwrap()))
4447
}
48+
49+
#[no_mangle]
50+
pub unsafe extern "C" fn ddtrace_strip_invalid_utf8(input: *const c_char, len: *mut usize) -> *mut c_char {
51+
match CharSlice::from_raw_parts(input, *len).to_utf8_lossy() {
52+
Cow::Borrowed(_) => null_mut(),
53+
Cow::Owned(s) => {
54+
*len = s.len();
55+
let ret = s.as_ptr() as *mut c_char;
56+
std::mem::forget(s);
57+
ret
58+
}
59+
}
60+
}
61+
62+
#[no_mangle]
63+
pub unsafe extern "C" fn ddtrace_drop_rust_string(input: *mut c_char, len: usize) {
64+
_ = String::from_raw_parts(input as *mut u8, len, len);
65+
}

ext/serializer.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,19 @@ extern void (*profiling_notify_trace_finished)(uint64_t local_root_span_id,
4646
zai_str span_type,
4747
zai_str resource);
4848

49+
static void mpack_write_utf8_lossy_cstr(mpack_writer_t *writer, const char *str, size_t len) {
50+
if (get_global_DD_TRACE_SIDECAR_TRACE_SENDER()) {
51+
char *strippedStr = ddtrace_strip_invalid_utf8(str, &len);
52+
if (strippedStr) {
53+
mpack_write_str(writer, strippedStr, len);
54+
ddtrace_drop_rust_string(strippedStr, len);
55+
return;
56+
}
57+
}
58+
59+
mpack_write_str(writer, str, len);
60+
}
61+
4962
#define MAX_ID_BUFSIZ 40 // 3.4e^38 = 39 chars + 1 terminator
5063
#define KEY_TRACE_ID "trace_id"
5164
#define KEY_SPAN_ID "span_id"
@@ -78,13 +91,15 @@ static int write_hash_table(mpack_writer_t *writer, HashTable *ht, int level) {
7891
bool zval_string_as_uint64 = false;
7992
if (is_assoc == 1) {
8093
char num_str_buf[MAX_ID_BUFSIZ], *key;
94+
size_t len;
8195
if (string_key) {
8296
key = ZSTR_VAL(string_key);
97+
len = ZSTR_LEN(string_key);
8398
} else {
8499
key = num_str_buf;
85-
sprintf(num_str_buf, ZEND_LONG_FMT, num_key);
100+
len = sprintf(num_str_buf, ZEND_LONG_FMT, num_key);
86101
}
87-
mpack_write_cstr(writer, key);
102+
mpack_write_utf8_lossy_cstr(writer, key, len);
88103
// If the key is trace_id, span_id or parent_id then strings have to be converted to uint64 when packed.
89104
if (level <= 3 &&
90105
(0 == strcmp(KEY_TRACE_ID, key) || 0 == strcmp(KEY_SPAN_ID, key) || 0 == strcmp(KEY_PARENT_ID, key))) {
@@ -134,7 +149,7 @@ static int msgpack_write_zval(mpack_writer_t *writer, zval *trace, int level) {
134149
mpack_write_bool(writer, Z_TYPE_P(trace) == IS_TRUE);
135150
break;
136151
case IS_STRING:
137-
mpack_write_cstr(writer, Z_STRVAL_P(trace));
152+
mpack_write_utf8_lossy_cstr(writer, Z_STRVAL_P(trace), Z_STRLEN_P(trace));
138153
break;
139154
default:
140155
LOG(WARN, "Serialize values must be of type array, string, int, float, bool or null");

0 commit comments

Comments
 (0)