Skip to content

Commit

Permalink
added path allow list (modified format)
Browse files Browse the repository at this point in the history
  • Loading branch information
cutler-scott-newrelic committed Aug 11, 2020
1 parent 000a7bb commit 629d326
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 57 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
- includes a default allowlist when none is specified
- if the pattern name "<GLOBAL>" is used it will be checked against all patterns
- moved the allowlist code into lib.rs so that all hogs will use it by default

- included a new format for allowlists that include checks for paths as well
33 changes: 24 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -392,22 +392,37 @@ As of version 1.0.5, the current default regex JSON used is as follows:

## Allowlist JSON file format

Some of the scanners provide a allowlist feature. This allows you to specific a allowlist file that identifies exceptions
to each regex pattern that should be excluded from the final output.
Scanners provide an allowlist feature. This allows you to specify a list of regular expressions for each pattern that
will be ignored by the scanner. You can now optionally supply a list of regular expressions that are evaluated against
the file path as well.

The format for this allowlist file should be a single json object. Each key in the allowlist should match a key in the
regex json, and the value should be an array of strings that are exceptions for that regex pattern. For example:
regex json, and the value can be one of two things:
1) An array of strings that are exceptions for that regex pattern. For example:
2) An object with at least one key (patterns) and optionally a second key (paths).

In addition, you can specify the key `<GLOBAL>` which is evaluated against all patterns.

The following is the default allowlist included in all scans:


```json
{
"Email address": [
"[email protected]",
"admin@mail.com"
"Email address": {
"patterns": [
"(?i).*@newrelic.com"
],
"New Relic Account IDs in URL": [
"newrelic.com/accounts/some-unoffensive-account-number",
"newrelic.com/accounts/an-account-that-doesn't-exist-like-this-one",
"paths": [
"(?i)authors",
"(?i)contributors",
"(?i)license",
"(?i)maintainers",
"(?i)third_party_notices"
]
},
"<GLOBAL>": [
"(?i)example"
]
}
```

Expand Down
2 changes: 2 additions & 0 deletions src/bin/duroc_hog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ fn run(arg_matches: &ArgMatches) -> Result<(), SimpleError> {
output.extend(scan_file(fspath, &secret_scanner, f, "", unzip));
}

let output: HashSet<FileFinding> = output.into_iter().filter(|ff| !secret_scanner.is_allowlisted_path(&ff.reason, ff.path.as_bytes())).collect();

info!("Found {} secrets", output.len());
match secret_scanner.output_findings(&output) {
Ok(_) => Ok(()),
Expand Down
21 changes: 16 additions & 5 deletions src/default_allowlist.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
{
"Email address":[
".*@newrelic.com"
],
"<GLOBAL>": ["(?i)example"]
}
"Email address": {
"patterns": [
"(?i).*@newrelic.com"
],
"paths": [
"(?i)authors",
"(?i)contributors",
"(?i)license",
"(?i)maintainers",
"(?i)third_party_notices"
]
},
"<GLOBAL>": [
"(?i)example"
]
}
14 changes: 5 additions & 9 deletions src/git_scanning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,10 @@ impl GitScanner {
);
}
if !secrets.is_empty() {
let create_finding = self.secret_scanner.check_entropy(&reason, new_line);
if create_finding {
let path = delta.new_file().path().unwrap().to_str().unwrap().to_string();
let enough_entropy = self.secret_scanner.check_entropy(&reason, new_line);
let valid_path = !self.secret_scanner.is_allowlisted_path(&reason, path.as_bytes());
if enough_entropy && valid_path {
findings.insert(GitFinding {
commit_hash: commit.id().to_string(),
commit: commit.message().unwrap().to_string(),
Expand All @@ -234,13 +236,7 @@ impl GitScanner {
date: NaiveDateTime::from_timestamp(commit.time().seconds(), 0)
.to_string(),
strings_found: secrets.clone(),
path: delta
.new_file()
.path()
.unwrap()
.to_str()
.unwrap()
.to_string(),
path,
reason: reason.clone(),
old_file_id: old_file_id.to_string(),
new_file_id: new_file_id.to_string(),
Expand Down
93 changes: 60 additions & 33 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ const ENTROPY_MAX_WORD_LEN: usize = 40;
#[derive(Debug, Clone)]
pub struct SecretScanner {
pub regex_map: BTreeMap<String, EntropyRegex>,
pub allowlist_map: BTreeMap<String, Vec<Regex>>,
pub allowlist_map: BTreeMap<String, AllowList>,
pub pretty_print: bool,
pub output_path: Option<String>,
pub entropy_min_word_len: usize,
Expand Down Expand Up @@ -255,6 +255,22 @@ pub enum PatternEntropy {
},
}

#[derive(Deserialize, Debug)]
#[serde(untagged)]
pub enum AllowListEnum {
PatternList(Vec<String>),
AllowListJson {
patterns: Vec<String>,
paths: Option<Vec<String>>
}
}

#[derive(Debug, Clone)]
pub struct AllowList {
pub pattern_list: Vec<Regex>,
pub path_list: Vec<Regex>
}

/// Used to instantiate the `SecretScanner` object with user-supplied options
///
/// Use the `new()` function to create a builder object, perform configurations as needed, then
Expand Down Expand Up @@ -630,32 +646,40 @@ impl SecretScannerBuilder {
.collect()
}

fn build_allowlist_from_str(input: &str) -> Result<BTreeMap<String, Vec<Regex>>, SimpleError> {
fn vec_string_to_vec_regex(incoming_array: Vec<String>) -> Vec<Regex> {
incoming_array
.into_iter()
.filter_map(|x| match Regex::new(&x) {
Ok(r) => Some(r),
Err(e) => {
error!("Failed to parse regex: {}", e);
None
}
})
.collect()
}

fn build_allowlist_from_str(input: &str) -> Result<BTreeMap<String, AllowList>, SimpleError> {
info!("Attempting to parse JSON allowlist string");
let allowlist: BTreeMap<String, Value> = match serde_json::from_str(input) {
let allowlist: BTreeMap<String, AllowListEnum> = match serde_json::from_str(input) {
Ok(m) => Ok(m),
Err(e) => Err(SimpleError::with("Failed to parse allowlist JSON", e)),
}?;
allowlist
.into_iter()
.map(|(p, list)| match list {
Value::Array(v) => {
let l = v
.into_iter()
.filter_map(|v| match v {
Value::String(s) => match Regex::new(&s) {
Ok(r) => Some(r),
Err(e) => {
error!("Failed to parse regex in allowlist JSON: {}", e);
None
}
},
_ => None,
})
.collect();
Ok((p, l))
.map(|(p, allowlistobj)| match allowlistobj {
AllowListEnum::PatternList(v) => {
let l = SecretScannerBuilder::vec_string_to_vec_regex(v);
Ok((p, AllowList { pattern_list: l, path_list: vec![] }))
}
AllowListEnum::AllowListJson { patterns: pattern_list, paths: path_list } => {
let l1 = SecretScannerBuilder::vec_string_to_vec_regex(pattern_list);
let l2 = match path_list {
Some(v) => SecretScannerBuilder::vec_string_to_vec_regex(v),
None => Vec::new()
};
Ok((p, AllowList { pattern_list: l1, path_list: l2 }))
}
_ => Err(SimpleError::new("Invalid allowlist JSON format")),
})
.collect()
}
Expand Down Expand Up @@ -696,7 +720,7 @@ impl SecretScanner {
let matches = x.1.pattern.find_iter(line);
let matches_filtered: Vec<RustyHogMatch> = matches
.filter(|m| self.check_entropy(x.0, &line[m.start()..m.end()]))
.filter(|m| !self.is_allowlisted(x.0, &line[m.start()..m.end()]))
.filter(|m| !self.is_allowlisted_pattern(x.0, &line[m.start()..m.end()]))
.map(RustyHogMatch::from)
.inspect(|x| debug!("RustyHogMatch: {:?}", x))
.collect();
Expand Down Expand Up @@ -947,21 +971,24 @@ impl SecretScanner {
Ok(())
}

/// Checks if any of the provided tokens is allowlisted
pub fn is_allowlisted(&self, pattern: &str, token: &[u8]) -> bool {
/// Checks if the provided path name is allowlisted
pub fn is_allowlisted_path(&self, pattern: &str, path: &[u8]) -> bool {
if let Some(allowlist) = self.allowlist_map.get(pattern) {
for allow_regex in allowlist {
if allow_regex.find(token).is_some() {
return true;
}
}
if allowlist.path_list.iter().any(|x| x.find(path).is_some()) { return true }
}
if let Some(allowlist) = self.allowlist_map.get("<GLOBAL>") {
for allow_regex in allowlist {
if allow_regex.find(token).is_some() {
return true;
}
}
if allowlist.path_list.iter().any(|x| x.find(path).is_some()) { return true }
}
false
}

/// Checks if the provided token is allowlisted
pub fn is_allowlisted_pattern(&self, pattern: &str, token: &[u8]) -> bool {
if let Some(allowlist) = self.allowlist_map.get(pattern) {
if allowlist.pattern_list.iter().any(|x| x.find(token).is_some()) { return true }
}
if let Some(allowlist) = self.allowlist_map.get("<GLOBAL>") {
if allowlist.pattern_list.iter().any(|x| x.find(token).is_some()) { return true }
}
false
}
Expand Down

0 comments on commit 629d326

Please sign in to comment.