diff --git a/CHANGELOG.md b/CHANGELOG.md index 91a52e9..542fa69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,4 +20,4 @@ - includes a default allowlist when none is specified - if the pattern name "" is used it will be checked against all patterns - moved the allowlist code into lib.rs so that all hogs will use it by default - + - included a new format for allowlists that include checks for paths as well \ No newline at end of file diff --git a/README.md b/README.md index a46a659..4ef18f8 100644 --- a/README.md +++ b/README.md @@ -392,22 +392,37 @@ As of version 1.0.5, the current default regex JSON used is as follows: ## Allowlist JSON file format -Some of the scanners provide a allowlist feature. This allows you to specific a allowlist file that identifies exceptions -to each regex pattern that should be excluded from the final output. +Scanners provide an allowlist feature. This allows you to specify a list of regular expressions for each pattern that +will be ignored by the scanner. You can now optionally supply a list of regular expressions that are evaluated against +the file path as well. The format for this allowlist file should be a single json object. Each key in the allowlist should match a key in the -regex json, and the value should be an array of strings that are exceptions for that regex pattern. For example: +regex json, and the value can be one of two things: +1) An array of strings that are exceptions for that regex pattern. For example: +2) An object with at least one key (patterns) and optionally a second key (paths). + +In addition, you can specify the key `` which is evaluated against all patterns. + +The following is the default allowlist included in all scans: + ```json { - "Email address": [ - "username@mail.com", - "admin@mail.com" + "Email address": { + "patterns": [ + "(?i).*@newrelic.com" ], - "New Relic Account IDs in URL": [ - "newrelic.com/accounts/some-unoffensive-account-number", - "newrelic.com/accounts/an-account-that-doesn't-exist-like-this-one", + "paths": [ + "(?i)authors", + "(?i)contributors", + "(?i)license", + "(?i)maintainers", + "(?i)third_party_notices" ] + }, + "": [ + "(?i)example" + ] } ``` diff --git a/src/bin/duroc_hog.rs b/src/bin/duroc_hog.rs index 5ed4ebf..68fe87c 100644 --- a/src/bin/duroc_hog.rs +++ b/src/bin/duroc_hog.rs @@ -128,6 +128,8 @@ fn run(arg_matches: &ArgMatches) -> Result<(), SimpleError> { output.extend(scan_file(fspath, &secret_scanner, f, "", unzip)); } + let output: HashSet = output.into_iter().filter(|ff| !secret_scanner.is_allowlisted_path(&ff.reason, ff.path.as_bytes())).collect(); + info!("Found {} secrets", output.len()); match secret_scanner.output_findings(&output) { Ok(_) => Ok(()), diff --git a/src/default_allowlist.json b/src/default_allowlist.json index aac326a..7afb9ef 100644 --- a/src/default_allowlist.json +++ b/src/default_allowlist.json @@ -1,6 +1,17 @@ { - "Email address":[ - ".*@newrelic.com" - ], - "": ["(?i)example"] -} + "Email address": { + "patterns": [ + "(?i).*@newrelic.com" + ], + "paths": [ + "(?i)authors", + "(?i)contributors", + "(?i)license", + "(?i)maintainers", + "(?i)third_party_notices" + ] + }, + "": [ + "(?i)example" + ] +} \ No newline at end of file diff --git a/src/git_scanning.rs b/src/git_scanning.rs index 4451e32..f233ea0 100644 --- a/src/git_scanning.rs +++ b/src/git_scanning.rs @@ -223,8 +223,10 @@ impl GitScanner { ); } if !secrets.is_empty() { - let create_finding = self.secret_scanner.check_entropy(&reason, new_line); - if create_finding { + let path = delta.new_file().path().unwrap().to_str().unwrap().to_string(); + let enough_entropy = self.secret_scanner.check_entropy(&reason, new_line); + let valid_path = !self.secret_scanner.is_allowlisted_path(&reason, path.as_bytes()); + if enough_entropy && valid_path { findings.insert(GitFinding { commit_hash: commit.id().to_string(), commit: commit.message().unwrap().to_string(), @@ -234,13 +236,7 @@ impl GitScanner { date: NaiveDateTime::from_timestamp(commit.time().seconds(), 0) .to_string(), strings_found: secrets.clone(), - path: delta - .new_file() - .path() - .unwrap() - .to_str() - .unwrap() - .to_string(), + path, reason: reason.clone(), old_file_id: old_file_id.to_string(), new_file_id: new_file_id.to_string(), diff --git a/src/lib.rs b/src/lib.rs index 2af0ce0..1cc262a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -217,7 +217,7 @@ const ENTROPY_MAX_WORD_LEN: usize = 40; #[derive(Debug, Clone)] pub struct SecretScanner { pub regex_map: BTreeMap, - pub allowlist_map: BTreeMap>, + pub allowlist_map: BTreeMap, pub pretty_print: bool, pub output_path: Option, pub entropy_min_word_len: usize, @@ -255,6 +255,22 @@ pub enum PatternEntropy { }, } +#[derive(Deserialize, Debug)] +#[serde(untagged)] +pub enum AllowListEnum { + PatternList(Vec), + AllowListJson { + patterns: Vec, + paths: Option> + } +} + +#[derive(Debug, Clone)] +pub struct AllowList { + pub pattern_list: Vec, + pub path_list: Vec +} + /// Used to instantiate the `SecretScanner` object with user-supplied options /// /// Use the `new()` function to create a builder object, perform configurations as needed, then @@ -630,32 +646,40 @@ impl SecretScannerBuilder { .collect() } - fn build_allowlist_from_str(input: &str) -> Result>, SimpleError> { + fn vec_string_to_vec_regex(incoming_array: Vec) -> Vec { + incoming_array + .into_iter() + .filter_map(|x| match Regex::new(&x) { + Ok(r) => Some(r), + Err(e) => { + error!("Failed to parse regex: {}", e); + None + } + }) + .collect() + } + + fn build_allowlist_from_str(input: &str) -> Result, SimpleError> { info!("Attempting to parse JSON allowlist string"); - let allowlist: BTreeMap = match serde_json::from_str(input) { + let allowlist: BTreeMap = match serde_json::from_str(input) { Ok(m) => Ok(m), Err(e) => Err(SimpleError::with("Failed to parse allowlist JSON", e)), }?; allowlist .into_iter() - .map(|(p, list)| match list { - Value::Array(v) => { - let l = v - .into_iter() - .filter_map(|v| match v { - Value::String(s) => match Regex::new(&s) { - Ok(r) => Some(r), - Err(e) => { - error!("Failed to parse regex in allowlist JSON: {}", e); - None - } - }, - _ => None, - }) - .collect(); - Ok((p, l)) + .map(|(p, allowlistobj)| match allowlistobj { + AllowListEnum::PatternList(v) => { + let l = SecretScannerBuilder::vec_string_to_vec_regex(v); + Ok((p, AllowList { pattern_list: l, path_list: vec![] })) + } + AllowListEnum::AllowListJson { patterns: pattern_list, paths: path_list } => { + let l1 = SecretScannerBuilder::vec_string_to_vec_regex(pattern_list); + let l2 = match path_list { + Some(v) => SecretScannerBuilder::vec_string_to_vec_regex(v), + None => Vec::new() + }; + Ok((p, AllowList { pattern_list: l1, path_list: l2 })) } - _ => Err(SimpleError::new("Invalid allowlist JSON format")), }) .collect() } @@ -696,7 +720,7 @@ impl SecretScanner { let matches = x.1.pattern.find_iter(line); let matches_filtered: Vec = matches .filter(|m| self.check_entropy(x.0, &line[m.start()..m.end()])) - .filter(|m| !self.is_allowlisted(x.0, &line[m.start()..m.end()])) + .filter(|m| !self.is_allowlisted_pattern(x.0, &line[m.start()..m.end()])) .map(RustyHogMatch::from) .inspect(|x| debug!("RustyHogMatch: {:?}", x)) .collect(); @@ -947,21 +971,24 @@ impl SecretScanner { Ok(()) } - /// Checks if any of the provided tokens is allowlisted - pub fn is_allowlisted(&self, pattern: &str, token: &[u8]) -> bool { + /// Checks if the provided path name is allowlisted + pub fn is_allowlisted_path(&self, pattern: &str, path: &[u8]) -> bool { if let Some(allowlist) = self.allowlist_map.get(pattern) { - for allow_regex in allowlist { - if allow_regex.find(token).is_some() { - return true; - } - } + if allowlist.path_list.iter().any(|x| x.find(path).is_some()) { return true } } if let Some(allowlist) = self.allowlist_map.get("") { - for allow_regex in allowlist { - if allow_regex.find(token).is_some() { - return true; - } - } + if allowlist.path_list.iter().any(|x| x.find(path).is_some()) { return true } + } + false + } + + /// Checks if the provided token is allowlisted + pub fn is_allowlisted_pattern(&self, pattern: &str, token: &[u8]) -> bool { + if let Some(allowlist) = self.allowlist_map.get(pattern) { + if allowlist.pattern_list.iter().any(|x| x.find(token).is_some()) { return true } + } + if let Some(allowlist) = self.allowlist_map.get("") { + if allowlist.pattern_list.iter().any(|x| x.find(token).is_some()) { return true } } false }