Skip to content

Commit d2fbb08

Browse files
committed
Fix bug where the cached_analysis_request was not properly invalidating the v8::UnboundScript cache for the passed in JSRuntime.
1 parent 9bd8970 commit d2fbb08

2 files changed

Lines changed: 102 additions & 83 deletions

File tree

  • crates

crates/bins/src/bin/datadog_static_analyzer_server/rule_cache.rs

Lines changed: 102 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@ pub struct CompiledServerRule {
1616
}
1717

1818
impl CompiledServerRule {
19-
/// Returns a reference to the [`ServerRule`] that this cache represents.
20-
pub fn inner(&self) -> &ServerRule {
21-
&self.rule
22-
}
23-
2419
/// Returns a reference to the [`RuleInternal`] that this cache represents.
2520
pub fn as_internal(&self) -> &RuleInternal {
2621
&self.internal
@@ -32,23 +27,17 @@ impl CompiledServerRule {
3227
#[derive(Debug)]
3328
pub struct RuleCache(DashMap<String, Arc<CompiledServerRule>>);
3429

35-
/// A return value from looking up or inserting a value into a [`RuleCache`]. `is_update` will be `true`
36-
/// if this value previously existed in the map and was updated, or `false` if either a
37-
/// previous value was returned or the rule name has been cached for the first time.
38-
#[derive(Debug)]
39-
pub(crate) struct CachedValue {
40-
pub(crate) rule: Arc<CompiledServerRule>,
41-
pub(crate) is_update: bool,
42-
}
43-
4430
impl RuleCache {
4531
/// Returns a new `RuleCache` with a capacity of 0.
4632
pub fn new() -> Self {
4733
Self(DashMap::new())
4834
}
4935

50-
/// Returns a [`CachedValue`] for the provided [`ServerRule`], utilizing a cache under the hood.
51-
pub fn get_or_insert_from(&self, server_rule: ServerRule) -> Result<CachedValue, &'static str> {
36+
/// Returns the [`CompiledServerRule`] for the provided [`ServerRule`], utilizing a cache under the hood.
37+
pub fn get_or_insert_from(
38+
&self,
39+
server_rule: ServerRule,
40+
) -> Result<Arc<CompiledServerRule>, &'static str> {
5241
if let Some(cached) = self.0.get(&server_rule.name) {
5342
// (Note that even if the cache has this rule re-written after this function returns, the caller
5443
// will still hold an owned reference to the original compiled rule, so there is no race).
@@ -57,10 +46,7 @@ impl RuleCache {
5746
// This check is necessary because the HashMap is keyed by rule name, so there
5847
// could be a collision if the rule is updated.
5948
if cached.rule == server_rule {
60-
return Ok(CachedValue {
61-
rule: Arc::clone(cached.value()),
62-
is_update: false,
63-
});
49+
return Ok(Arc::clone(cached.value()));
6450
}
6551
}
6652
// If there was no cached rule (or the rule wasn't an exact match), compile and cache a new rule.
@@ -71,11 +57,8 @@ impl RuleCache {
7157
internal: rule_internal,
7258
});
7359
let cloned_rule = Arc::clone(&compiled);
74-
let existing = self.0.insert(rule_name, compiled);
75-
Ok(CachedValue {
76-
rule: cloned_rule,
77-
is_update: existing.is_some(),
78-
})
60+
self.0.insert(rule_name, compiled);
61+
Ok(cloned_rule)
7962
}
8063
}
8164

@@ -96,11 +79,9 @@ pub(crate) fn cached_analysis_request(
9679
// we know the runtime's v8::Script cache cannot change out from under us.
9780
// Thus, there is no potential race in the time after clearing the cache
9881
// but before actually executing the JavaScript rule).
99-
if cached.is_update {
100-
// The runtime's `v8::Script` cache is keyed only by name -- we need to manually clear it.
101-
runtime.clear_rule_cache(&cached.rule.inner().name);
102-
}
103-
compiled_rules.push(cached.rule);
82+
let internal = cached.as_internal();
83+
let _ = runtime.evict_script_if_stale(&internal.name, &internal.code);
84+
compiled_rules.push(cached);
10485
}
10586
let rule_internals = compiled_rules
10687
.iter()
@@ -120,8 +101,7 @@ pub(crate) fn cached_analysis_request(
120101
let mut rule_internals = Vec::with_capacity(request.rules.len());
121102
for server_rule in request.rules {
122103
let rule_internal = RuleInternal::try_from(server_rule)?;
123-
// The runtime's `v8::Script` cache is keyed only by name -- we need to manually clear it.
124-
runtime.clear_rule_cache(&rule_internal.name);
104+
let _ = runtime.evict_script_if_stale(&rule_internal.name, &rule_internal.code);
125105
rule_internals.push(rule_internal);
126106
}
127107
let req_with_internal: AnalysisRequest<RuleInternal> = AnalysisRequest {
@@ -144,54 +124,54 @@ mod tests {
144124
use kernel::model::common::Language;
145125
use kernel::model::rule::{compute_sha256, RuleCategory, RuleSeverity, RuleType};
146126
use kernel::utils::encode_base64_string;
147-
use server::model::analysis_request::{AnalysisRequest, ServerRule};
127+
use server::model::analysis_request::{AnalysisRequest, AnalysisRequestOptions, ServerRule};
148128

149-
/// Tests that `cached_analysis_request` implements the (optional) cache properly.
150-
/// When enabled, this requires:
151-
/// * Updating the `RuleCache` cache
152-
/// * Updating the thread-local JavaScript runtime's `v8::Script` cache
129+
fn request_from(
130+
rule_name: &str,
131+
language: Language,
132+
rule: (&str, &str),
133+
) -> AnalysisRequest<ServerRule> {
134+
let file_contents = "
135+
def abc():
136+
";
137+
let code_base64 = encode_base64_string(rule.0.to_string());
138+
let checksum = Some(compute_sha256(code_base64.as_bytes()));
139+
let ts_query_b64 = encode_base64_string(rule.1.to_string());
140+
let server_rule = ServerRule {
141+
name: rule_name.to_string(),
142+
short_description_base64: None,
143+
description_base64: None,
144+
category: Some(RuleCategory::BestPractices),
145+
severity: Some(RuleSeverity::Warning),
146+
language,
147+
rule_type: RuleType::TreeSitterQuery,
148+
entity_checked: None,
149+
code_base64,
150+
checksum,
151+
pattern: None,
152+
tree_sitter_query_base64: Some(ts_query_b64),
153+
arguments: vec![],
154+
};
155+
AnalysisRequest {
156+
filename: "file.py".to_string(),
157+
language: Language::Python,
158+
file_encoding: "utf-8".to_string(),
159+
code_base64: encode_base64_string(file_contents.to_string()),
160+
rules: vec![server_rule],
161+
configuration_base64: None,
162+
options: Some(AnalysisRequestOptions {
163+
log_output: Some(true),
164+
use_tree_sitter: None,
165+
}),
166+
}
167+
}
168+
169+
/// `cached_analysis_request` operates [RuleCache] properly.
153170
#[test]
154171
fn test_cached_analysis_request() {
155172
const RULE_NAME: &str = "ruleset/rule-name";
156173
let v8 = ddsa_lib::test_utils::cfg_test_v8();
157174

158-
fn request_from(
159-
rule_name: &str,
160-
language: Language,
161-
rule: (&str, &str),
162-
) -> AnalysisRequest<ServerRule> {
163-
let file_contents = "
164-
def abc():
165-
";
166-
let code_base64 = encode_base64_string(rule.0.to_string());
167-
let checksum = Some(compute_sha256(code_base64.as_bytes()));
168-
let ts_query_b64 = encode_base64_string(rule.1.to_string());
169-
let server_rule = ServerRule {
170-
name: rule_name.to_string(),
171-
short_description_base64: None,
172-
description_base64: None,
173-
category: Some(RuleCategory::BestPractices),
174-
severity: Some(RuleSeverity::Warning),
175-
language,
176-
rule_type: RuleType::TreeSitterQuery,
177-
entity_checked: None,
178-
code_base64,
179-
checksum,
180-
pattern: None,
181-
tree_sitter_query_base64: Some(ts_query_b64),
182-
arguments: vec![],
183-
};
184-
AnalysisRequest {
185-
filename: "file.py".to_string(),
186-
language: Language::Python,
187-
file_encoding: "utf-8".to_string(),
188-
code_base64: encode_base64_string(file_contents.to_string()),
189-
rules: vec![server_rule],
190-
configuration_base64: None,
191-
options: None,
192-
}
193-
}
194-
195175
let req_v1 = request_from(
196176
RULE_NAME,
197177
Language::Python,
@@ -255,4 +235,52 @@ function visit(captures) {
255235
}
256236
}
257237
}
238+
239+
/// `cached_analysis_request` operates the v8::UnboundScript cache of JSRuntime.
240+
#[test]
241+
fn test_cached_analysis_request_runtime_script_cache() {
242+
const RULE_NAME: &str = "ruleset/rule-name";
243+
let v8 = ddsa_lib::test_utils::cfg_test_v8();
244+
let cache = RuleCache::new();
245+
246+
let req_v1 = request_from(
247+
RULE_NAME,
248+
Language::Python,
249+
(
250+
// language=javascript
251+
"function visit(captures) { console.log('rule_v1'); }",
252+
"(function_definition) @func",
253+
),
254+
);
255+
let req_v2 = request_from(
256+
RULE_NAME,
257+
Language::Python,
258+
(
259+
// language=javascript
260+
"function visit(captures) { console.log('rule_v2'); }",
261+
"(function_definition) @func",
262+
),
263+
);
264+
265+
// Test invariants:
266+
// Tests equality of rule name, which triggers a cache collision.
267+
assert_eq!(req_v1.rules[0].name, req_v2.rules[0].name);
268+
// Tests the clearing of `v8::Script` cache
269+
assert_ne!(req_v1.rules[0].code_base64, req_v2.rules[0].code_base64);
270+
271+
// Two runtimes to simulate two threads. Thus, there are two v8::UnboundScript caches, but the entrypoint is the same `RuleCache`
272+
let mut rt_a = v8.new_runtime();
273+
let mut rt_b = v8.new_runtime();
274+
275+
let resp = cached_analysis_request(&mut rt_a, req_v1.clone(), None, Some(&cache)).unwrap();
276+
assert_eq!(resp[0].output.as_ref().unwrap(), "rule_v1");
277+
278+
let resp = cached_analysis_request(&mut rt_b, req_v2.clone(), None, Some(&cache)).unwrap();
279+
assert_eq!(resp[0].output.as_ref().unwrap(), "rule_v2");
280+
281+
// `rt_a` still contains a v8::UnboundScript for `req_v1`.
282+
// `cached_analysis_request` must correctly instruct `rt_a` to clear its cache for `RULE_NAME`.
283+
let resp = cached_analysis_request(&mut rt_a, req_v2.clone(), None, Some(&cache)).unwrap();
284+
assert_eq!(resp[0].output.as_ref().unwrap(), "rule_v2");
285+
}
258286
}

crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -214,15 +214,6 @@ impl JsRuntime {
214214
})
215215
}
216216

217-
/// Clears the cache for the given rule name, returning `true` if a script
218-
/// existed and was removed from the cache, or `false` if it didn't exist.
219-
///
220-
/// # Panics
221-
/// Panics if the `rule_cache` has an existing borrow.
222-
pub fn clear_rule_cache(&self, rule_name: &str) -> bool {
223-
self.rule_cache.borrow_mut().remove(rule_name).is_some()
224-
}
225-
226217
/// Drops the cached [v8::UnboundScript] for the given `rule_name` if its `rule_code` doesn't match the provided.
227218
/// Returns `true` if an entry was evicted, or `false` if not.
228219
///

0 commit comments

Comments
 (0)