Skip to content

Commit e495f67

Browse files
committed
Use v2 ConfigFile data structures to power CLI and Git hook
1 parent ff4df07 commit e495f67

10 files changed

Lines changed: 218 additions & 124 deletions

File tree

crates/bins/src/bin/datadog-static-analyzer-git-hook.rs

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ use cli::config_file::get_config;
33
use cli::constants::{
44
DEFAULT_MAX_CPUS, DEFAULT_MAX_FILE_SIZE_KB, EXIT_CODE_GITHOOK_FAILED,
55
EXIT_CODE_INVALID_CONFIGURATION, EXIT_CODE_INVALID_DIRECTORY, EXIT_CODE_NO_DIRECTORY,
6-
EXIT_CODE_NO_SECRET_OR_STATIC_ANALYSIS, EXIT_CODE_RULE_CHECKSUM_INVALID,
7-
EXIT_CODE_SHA_OR_DEFAULT_BRANCH,
6+
EXIT_CODE_NO_SECRET_OR_STATIC_ANALYSIS, EXIT_CODE_RULESET_NOT_FOUND,
7+
EXIT_CODE_RULE_CHECKSUM_INVALID, EXIT_CODE_SHA_OR_DEFAULT_BRANCH,
8+
};
9+
use cli::datadog_utils::{
10+
get_all_default_rulesets, get_rules_from_rulesets, get_secrets_rules, DatadogApiError,
811
};
9-
use cli::datadog_utils::{get_all_default_rulesets, get_rules_from_rulesets, get_secrets_rules};
1012
use cli::file_utils::{filter_files_by_size, get_files, read_files_from_gitignore};
1113
use cli::git_utils::{
1214
get_changed_files_between_shas, get_changed_files_with_branch, get_default_branch,
@@ -24,7 +26,7 @@ use itertools::Itertools;
2426
use kernel::analysis::ddsa_lib::v8_platform::{initialize_v8, Initialized, V8Platform};
2527
use kernel::classifiers::ArtifactClassification;
2628
use kernel::config::common::{ConfigMethod, PathConfig};
27-
use kernel::config::file_v1;
29+
use kernel::config::file_v2;
2830
use kernel::constants::{CARGO_VERSION, VERSION};
2931
use kernel::model::common::OutputFormat::Json;
3032
use kernel::model::rule::{Rule, RuleResult};
@@ -255,7 +257,7 @@ fn main() -> Result<()> {
255257
let configuration_file_and_method = get_config(directory_to_analyze.as_str(), use_debug);
256258

257259
let (configuration_file, configuration_method): (
258-
Option<file_v1::ConfigFile>,
260+
Option<file_v2::ConfigFile>,
259261
Option<ConfigMethod>,
260262
) = match configuration_file_and_method {
261263
Ok(cfg) => match cfg {
@@ -276,29 +278,57 @@ fn main() -> Result<()> {
276278
.map(RuleConfigProvider::from_config)
277279
.unwrap_or_default();
278280

281+
// A list of rulesets that were fetched due to being specifically listed in a ConfigFile::use_rulesets list.
282+
let mut fetched_rulesets = Vec::<&str>::new();
279283
// if there is a configuration file, we load the rules from it. But it means
280284
// we cannot have the rule parameter given.
281-
if let Some(conf) = configuration_file {
282-
ignore_gitignore = conf.ignore_gitignore.unwrap_or(false);
285+
if let Some(conf) = &configuration_file {
286+
ignore_gitignore = conf
287+
.global_config
288+
.as_ref()
289+
.and_then(|g| g.use_gitignore.map(|b| !b))
290+
.unwrap_or(false);
283291

284292
if static_analysis_enabled {
285-
let rulesets = conf.rulesets.keys().cloned().collect_vec();
286-
let rules_from_api = get_rules_from_rulesets(&rulesets, use_staging, use_debug)
287-
.context("error when reading rules from API")?;
288-
rules.extend(rules_from_api);
293+
if let Some(rulesets) = &conf.use_rulesets {
294+
let rules_from_api = get_rules_from_rulesets(rulesets, use_staging, use_debug)
295+
.inspect_err(|e| {
296+
if let DatadogApiError::RulesetNotFound(rs) = e {
297+
eprintln!("Error: ruleset {rs} not found");
298+
exit(EXIT_CODE_RULESET_NOT_FOUND);
299+
}
300+
})
301+
.context("error when reading rules from API")?;
302+
rules.extend(rules_from_api);
303+
for r in rulesets {
304+
fetched_rulesets.push(r.as_str());
305+
}
306+
}
289307
}
290308

291309
// copy the only and ignore paths from the configuration file
292-
path_config.ignore.extend(conf.paths.ignore);
293-
path_config.only = conf.paths.only;
310+
if let Some(pc) = conf.global_config.as_ref().and_then(|g| g.paths.as_ref()) {
311+
path_config.ignore.extend_from_slice(&pc.ignore);
312+
path_config.only = pc.only.clone();
313+
}
294314

295315
// Get the max file size from the configuration or default to the default constant.
296-
max_file_size_kb = conf.max_file_size_kb.unwrap_or(DEFAULT_MAX_FILE_SIZE_KB);
297-
ignore_generated_files = conf.ignore_generated_files.unwrap_or(true);
298-
} else {
316+
max_file_size_kb = conf
317+
.global_config
318+
.as_ref()
319+
.and_then(|g| g.max_file_size_kb)
320+
.unwrap_or(DEFAULT_MAX_FILE_SIZE_KB);
321+
ignore_generated_files = conf
322+
.global_config
323+
.as_ref()
324+
.and_then(|g| g.ignore_generated_files)
325+
.unwrap_or(true);
326+
}
327+
328+
if static_analysis_enabled {
299329
// if there is no config file, we take the default rules from our APIs.
300330

301-
if use_debug {
331+
if configuration_file.is_none() && use_debug {
302332
println!("WARNING: no configuration file detected, getting the default rules from the Datadog API");
303333
println!("Check the following resources to configure your rules:");
304334
println!(
@@ -307,10 +337,11 @@ fn main() -> Result<()> {
307337
println!(" - Static analyzer repository on GitHub: https://github.com/DataDog/datadog-static-analyzer");
308338
}
309339

310-
if static_analysis_enabled {
340+
let should_fetch = !matches!(&configuration_file, Some(config) if config.use_default_rulesets == Some(false));
341+
if should_fetch {
311342
let rulesets_from_api =
312-
get_all_default_rulesets(use_staging, use_debug).expect("cannot get default rules");
313-
343+
get_all_default_rulesets(use_staging, use_debug, &fetched_rulesets)
344+
.context("cannot get default rules")?;
314345
rules.extend(rulesets_from_api.into_iter().flat_map(|rs| rs.into_rules()));
315346
}
316347
}

crates/bins/src/bin/datadog-static-analyzer.rs

Lines changed: 56 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use kernel::analysis::ddsa_lib::v8_platform::{initialize_v8, Initialized, V8Plat
3535
use kernel::analysis::generated_content::DEFAULT_IGNORED_GLOBS;
3636
use kernel::classifiers::ArtifactClassification;
3737
use kernel::config::common::{ConfigMethod, PathConfig};
38-
use kernel::config::file_v1;
38+
use kernel::config::file_v2;
3939
use kernel::constants::{CARGO_VERSION, VERSION};
4040
use kernel::model::common::OutputFormat;
4141
use kernel::model::rule::{Rule, RuleSeverity};
@@ -264,7 +264,7 @@ fn main() -> Result<()> {
264264
let configuration_file_and_method = get_config(directory_to_analyze.as_str(), use_debug);
265265

266266
let (configuration_file, configuration_method): (
267-
Option<file_v1::ConfigFile>,
267+
Option<file_v2::ConfigFile>,
268268
Option<ConfigMethod>,
269269
) = match configuration_file_and_method {
270270
Ok(cfg) => match cfg {
@@ -290,47 +290,74 @@ fn main() -> Result<()> {
290290
.unwrap_or_default();
291291
let mut rules: Vec<Rule> = Vec::new();
292292

293+
// A list of rulesets that were fetched due to being specifically listed in a ConfigFile::use_rulesets list.
294+
let mut fetched_rulesets = Vec::<&str>::new();
293295
// if there is a configuration file, we load the rules from it. But it means
294296
// we cannot have the rule parameter given.
295-
if let Some(conf) = configuration_file {
296-
ignore_gitignore = conf.ignore_gitignore.unwrap_or(false);
297+
if let Some(conf) = &configuration_file {
298+
ignore_gitignore = conf
299+
.global_config
300+
.as_ref()
301+
.and_then(|g| g.use_gitignore.map(|b| !b))
302+
.unwrap_or(false);
297303
if rules_file.is_some() {
298304
eprintln!("a rule file cannot be specified when a configuration file is present.");
299305
exit(EXIT_CODE_RULE_FILE_WITH_CONFIGURATION);
300306
}
301307

302308
if static_analysis_enabled {
303-
let rulesets = conf.rulesets.keys().cloned().collect_vec();
304-
let rules_from_api = get_rules_from_rulesets(&rulesets, use_staging, use_debug)
305-
.inspect_err(|e| {
306-
if let DatadogApiError::RulesetNotFound(rs) = e {
307-
eprintln!("Error: ruleset {rs} not found");
308-
exit(EXIT_CODE_RULESET_NOT_FOUND);
309-
}
310-
})
311-
.context("error when reading rules from API")?;
312-
rules.extend(rules_from_api);
309+
if let Some(rulesets) = &conf.use_rulesets {
310+
let rules_from_api = get_rules_from_rulesets(rulesets, use_staging, use_debug)
311+
.inspect_err(|e| {
312+
if let DatadogApiError::RulesetNotFound(rs) = e {
313+
eprintln!("Error: ruleset {rs} not found");
314+
exit(EXIT_CODE_RULESET_NOT_FOUND);
315+
}
316+
})
317+
.context("error when reading rules from API")?;
318+
rules.extend(rules_from_api);
319+
for r in rulesets {
320+
fetched_rulesets.push(r.as_str());
321+
}
322+
}
313323
}
314324
// copy the only and ignore paths from the configuration file
315-
path_config.ignore.extend(conf.paths.ignore);
316-
path_config.only = conf.paths.only;
325+
if let Some(pc) = conf.global_config.as_ref().and_then(|g| g.paths.as_ref()) {
326+
path_config.ignore.extend_from_slice(&pc.ignore);
327+
path_config.only = pc.only.clone();
328+
}
317329

318330
// Get the max file size from the configuration or default to the default constant.
319-
max_file_size_kb = conf.max_file_size_kb.unwrap_or(DEFAULT_MAX_FILE_SIZE_KB);
320-
ignore_generated_files = conf.ignore_generated_files.unwrap_or(true);
321-
} else if static_analysis_enabled {
331+
max_file_size_kb = conf
332+
.global_config
333+
.as_ref()
334+
.and_then(|g| g.max_file_size_kb)
335+
.unwrap_or(DEFAULT_MAX_FILE_SIZE_KB);
336+
ignore_generated_files = conf
337+
.global_config
338+
.as_ref()
339+
.and_then(|g| g.ignore_generated_files)
340+
.unwrap_or(true);
341+
}
342+
343+
if static_analysis_enabled {
322344
// if there is no config file, we take the default rules from our APIs.
323345
if rules_file.is_none() {
324-
println!("WARNING: no configuration file detected, getting the default rules from the Datadog API");
325-
println!("Check the following resources to configure your rules:");
326-
println!(
327-
" - Datadog documentation: https://docs.datadoghq.com/code_analysis/static_analysis"
328-
);
329-
println!(" - Static analyzer repository on GitHub: https://github.com/DataDog/datadog-static-analyzer");
330-
let rulesets_from_api =
331-
get_all_default_rulesets(use_staging, use_debug).expect("cannot get default rules");
332-
333-
rules.extend(rulesets_from_api.into_iter().flat_map(|rs| rs.into_rules()));
346+
if configuration_file.is_none() {
347+
println!("WARNING: no configuration file detected, getting the default rules from the Datadog API");
348+
println!("Check the following resources to configure your rules:");
349+
println!(
350+
" - Datadog documentation: https://docs.datadoghq.com/code_analysis/static_analysis"
351+
);
352+
println!(" - Static analyzer repository on GitHub: https://github.com/DataDog/datadog-static-analyzer");
353+
}
354+
let should_fetch = !matches!(&configuration_file, Some(config) if config.use_default_rulesets == Some(false));
355+
if should_fetch {
356+
let rulesets_from_api =
357+
get_all_default_rulesets(use_staging, use_debug, &fetched_rulesets)
358+
.context("cannot get default rules")?;
359+
rules.extend(rulesets_from_api.into_iter().flat_map(|rs| rs.into_rules()));
360+
}
334361
} else {
335362
let rulesets_from_file = get_rulesets_from_file(rules_file.clone().unwrap().as_str());
336363
rules.extend(

crates/cli/src/config_file.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@ use crate::datadog_utils::{
55
};
66
use crate::git_utils::get_repository_url;
77
use anyhow::{anyhow, Context};
8-
use kernel::config::common::ConfigMethod;
9-
use kernel::config::file_v1;
10-
use kernel::config::file_v1::parse_config_file;
8+
use kernel::config::common::{parse_any_schema_yaml, ConfigMethod, WithVersion};
9+
use kernel::config::file_v2;
1110
use kernel::utils::{decode_base64_string, encode_base64_string};
1211
use std::path::Path;
1312

@@ -48,12 +47,16 @@ pub fn read_config_file(base_path: &str) -> anyhow::Result<Option<String>> {
4847
pub fn get_config(
4948
path: &str,
5049
debug: bool,
51-
) -> anyhow::Result<Option<(file_v1::ConfigFile, ConfigMethod)>> {
50+
) -> anyhow::Result<Option<(file_v2::ConfigFile, ConfigMethod)>> {
5251
let local_file_contents = read_config_file(path)?;
53-
let local_config = local_file_contents
52+
let local_yaml = local_file_contents
5453
.as_ref()
55-
.map(|c| parse_config_file(c))
54+
.map(|c| parse_any_schema_yaml(c))
5655
.transpose()?;
56+
let local_config: Option<file_v2::ConfigFile> = local_yaml.map(|v| match v {
57+
WithVersion::V1(v1) => file_v2::YamlConfigFile::from(v1).into(),
58+
WithVersion::V2(v2) => v2.into(),
59+
});
5760

5861
if !should_use_datadog_backend() {
5962
if debug {
@@ -92,15 +95,19 @@ pub fn get_config(
9295
let text = decode_base64_string(remote_config_base64)
9396
.context("error when decoding base64 remote config")?;
9497

95-
let res = parse_config_file(&text).inspect_err(|err| {
98+
let res = parse_any_schema_yaml(&text).inspect_err(|err| {
9699
if debug {
97100
eprintln!("Error when parsing remote config: {err:?}");
98101
eprintln!("Proceeding with local config");
99102
}
100103
});
101-
let Ok(remote_config) = res else {
104+
let Ok(remote_yaml) = res else {
102105
return Ok(local_config.map(|c| (c, ConfigMethod::File)));
103106
};
107+
let remote_config: file_v2::ConfigFile = match remote_yaml {
108+
WithVersion::V1(v1) => file_v2::YamlConfigFile::from(v1).into(),
109+
WithVersion::V2(v2) => v2.into(),
110+
};
104111

105112
let config_method = if local_config.is_some() {
106113
ConfigMethod::RemoteConfigurationWithFile

crates/cli/src/datadog_utils.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -281,15 +281,24 @@ pub fn get_default_rulesets_name_for_language(
281281
}
282282

283283
/// Get all the default rulesets available at DataDog. Take all the language
284-
/// from `DEFAULT_RULESETS_LANGAGES` and get their rulesets
285-
pub fn get_all_default_rulesets(use_staging: bool, debug: bool) -> Result<Vec<RuleSet>> {
284+
/// from [`DEFAULT_RULESETS_LANGUAGES`] and get their rulesets
285+
///
286+
/// Any ruleset names present in `excluded` will not be fetched.
287+
pub fn get_all_default_rulesets(
288+
use_staging: bool,
289+
debug: bool,
290+
excluded: &[&str],
291+
) -> Result<Vec<RuleSet>> {
286292
let mut result: Vec<RuleSet> = vec![];
287293

288294
for language in DEFAULT_RULESETS_LANGUAGES {
289295
let ruleset_names =
290296
get_default_rulesets_name_for_language(language.to_string(), use_staging, debug)?;
291297

292-
for ruleset_name in ruleset_names {
298+
for ruleset_name in ruleset_names
299+
.into_iter()
300+
.filter(|name| !excluded.contains(&name.as_str()))
301+
{
293302
result.push(get_ruleset(ruleset_name.as_str(), use_staging, debug)?);
294303
}
295304
}

crates/static-analysis-kernel/src/analysis/analyze.rs

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,8 @@ mod tests {
396396
use super::*;
397397
use crate::analysis::ddsa_lib::test_utils::cfg_test_v8;
398398
use crate::analysis::tree_sitter::get_query;
399-
use crate::config::file_v1::parse_config_file;
399+
use crate::config::common::{parse_any_schema_yaml, WithVersion};
400+
use crate::config::file_v2;
400401
use crate::model::common::Language;
401402
use crate::model::rule::{RuleCategory, RuleSeverity};
402403
use crate::rule_config::RuleConfigProvider;
@@ -1169,10 +1170,9 @@ function visit(node, filename, code) {
11691170
tree_sitter_query: get_query(QUERY_CODE, &Language::Python).unwrap(),
11701171
};
11711172

1172-
let analysis_options = AnalysisOptions::default();
1173-
let rule_config_provider = RuleConfigProvider::from_config(
1174-
&parse_config_file(
1175-
r#"
1173+
let local_config: file_v2::ConfigFile = parse_any_schema_yaml(
1174+
// language=yaml
1175+
r#"
11761176
rulesets:
11771177
- rs:
11781178
rules:
@@ -1181,9 +1181,15 @@ rulesets:
11811181
my-argument: 101
11821182
another-arg: 101
11831183
"#,
1184-
)
1185-
.unwrap(),
1186-
);
1184+
)
1185+
.map(|v| match v {
1186+
WithVersion::V1(v1) => file_v2::YamlConfigFile::from(v1).into(),
1187+
WithVersion::V2(v2) => v2.into(),
1188+
})
1189+
.unwrap();
1190+
1191+
let analysis_options = AnalysisOptions::default();
1192+
let rule_config_provider = RuleConfigProvider::from_config(&local_config);
11871193
let rule_config = rule_config_provider.config_for_file("myfile.py");
11881194

11891195
let results = analyze(
@@ -1291,9 +1297,10 @@ function visit(node, filename, code) {
12911297
ignore_generated_files: false,
12921298
timeout: None,
12931299
};
1294-
let rule_config_provider = RuleConfigProvider::from_config(
1295-
&parse_config_file(
1296-
r#"
1300+
1301+
let local_config: file_v2::ConfigFile = parse_any_schema_yaml(
1302+
// language=yaml
1303+
r#"
12971304
rulesets:
12981305
- rs:
12991306
rules:
@@ -1305,9 +1312,14 @@ rulesets:
13051312
uno: NOTICE
13061313
dos/myfile.py: ERROR
13071314
"#,
1308-
)
1309-
.unwrap(),
1310-
);
1315+
)
1316+
.map(|v| match v {
1317+
WithVersion::V1(v1) => file_v2::YamlConfigFile::from(v1).into(),
1318+
WithVersion::V2(v2) => v2.into(),
1319+
})
1320+
.unwrap();
1321+
1322+
let rule_config_provider = RuleConfigProvider::from_config(&local_config);
13111323
let rules = vec![rule1, rule2];
13121324

13131325
let results = analyze(

0 commit comments

Comments
 (0)