From 3d1903fbdd4ae859a7359350f9bf1963b6c317a7 Mon Sep 17 00:00:00 2001 From: Sanket Saurav Date: Wed, 7 Aug 2019 12:55:54 +0530 Subject: [PATCH] Fix some bug risks and code quality issues Changes: - In `gatherers/gathererabc.py`, `utils/scan_utils.py` and `utils/utils.py`, fix dangerous default argument (mutables) from being passed in methods - In `scanners/sslyze.py`, fix an instance of unnecessary `len()` call being used to check for emptiness of an iterable - In `gatherers/censys.py`, `gatherers/rdns.py` and `utils/scan_utils.py`, make logging calls to lazily evaluate strings Also added a .deepsource.toml configuration file to run Continuous Quality analysis on the repo. --- .deepsource.toml | 19 +++++++++++++++++++ gatherers/censys.py | 2 +- gatherers/gathererabc.py | 4 +++- gatherers/rdns.py | 2 +- scanners/sslyze.py | 4 ++-- utils/scan_utils.py | 13 +++++++++---- utils/utils.py | 4 +++- 7 files changed, 38 insertions(+), 10 deletions(-) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000..b7a669d8 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,19 @@ +version = 1 + +test_patterns = [ + 'tests/**' +] + +exclude_patterns = [ + 'docs/**', + 'lambda/**' +] + +[[analyzers]] + name = 'python' + enabled = true + runtime_version = '3.x.x' + + [analyzers.meta] + max_line_length = 100 + skip_doc_coverage = ["module", "magic", "class"] diff --git a/gatherers/censys.py b/gatherers/censys.py index 205257dd..e60e0c8a 100644 --- a/gatherers/censys.py +++ b/gatherers/censys.py @@ -61,7 +61,7 @@ def gather(self): # Construct the query. query = query_for(self.suffixes) - logging.debug("Censys query:\n%s\n" % query) + logging.debug("Censys query:\n%s\n", query) # Plan to store in cache/censys/export.csv. download_path = utils.cache_path( diff --git a/gatherers/gathererabc.py b/gatherers/gathererabc.py index 47762627..fa0ac476 100644 --- a/gatherers/gathererabc.py +++ b/gatherers/gathererabc.py @@ -5,7 +5,9 @@ class Gatherer(metaclass=ABCMeta): - def __init__(self, suffixes: List[str], options: dict, extra: dict={}): + def __init__(self, suffixes: List[str], options: dict, extra: dict = None): + if extra is None: + extra = {} self.suffixes = suffixes self.options = options self.extra = extra diff --git a/gatherers/rdns.py b/gatherers/rdns.py index 5cf17da9..00bdc38e 100644 --- a/gatherers/rdns.py +++ b/gatherers/rdns.py @@ -42,7 +42,7 @@ def gather(self): exit(1) with open(path) as lines: - logging.debug("\tReading %s..." % path) + logging.debug("\tReading %s...", path) for record in process_lines(lines, ip_filter, number_filter): yield record diff --git a/scanners/sslyze.py b/scanners/sslyze.py index c2d1e573..25a06c04 100644 --- a/scanners/sslyze.py +++ b/scanners/sslyze.py @@ -353,7 +353,7 @@ def analyze_protocols_and_ciphers(data, sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, t ) data['ciphers'] = [cipher.name for cipher in accepted_ciphers] - if len(accepted_ciphers) > 0: + if accepted_ciphers: # Look at accepted cipher suites for RC4 or DHE. # This is imperfect, as the advertising of RC4 could discriminate based on client. # DHE and ECDHE may not remain the only forward secret options for TLS. @@ -526,7 +526,7 @@ def analyze_certs(certs): if oid in apple_ev: browsers.append("Apple") - if len(browsers) > 0: + if browsers: data['certs']['ev']['trusted'] = True # Log each new OID we observe as marked for EV. diff --git a/utils/scan_utils.py b/utils/scan_utils.py index f441bdf7..6aa4c0d7 100644 --- a/utils/scan_utils.py +++ b/utils/scan_utils.py @@ -113,8 +113,10 @@ def format_last_exception(): # Command Line Conveniences # -def scan(command: List[str], env: dict=None, - allowed_return_codes: list=[]) -> Union[str, None]: +def scan(command: List[str], env: dict = None, + allowed_return_codes: list = None) -> Union[str, None]: + if allowed_return_codes is None: + allowed_return_codes = [] try: response = subprocess.check_output( command, @@ -180,7 +182,7 @@ def configure_logging(options: Union[dict, None]=None) -> None: # This loads the whole thing into memory: it's not a great solution for # super-large lists of domains. def sort_csv(input_filename): - logging.warning("Sorting %s..." % input_filename) + logging.warning("Sorting %s...", input_filename) input_file = open(input_filename, encoding='utf-8', newline='') tmp_filename = "%s.tmp" % input_filename @@ -221,7 +223,10 @@ def sort_csv(input_filename): shutil.move(tmp_filename, input_filename) -def write_rows(rows, domain, base_domain, scanner, csv_writer, meta={}): +def write_rows(rows, domain, base_domain, scanner, csv_writer, meta=None): + + if meta is None: + meta = {} # If we didn't get any info, we'll still output information about why the scan failed. if rows is None: diff --git a/utils/utils.py b/utils/utils.py index 3722df0b..3c8dbbeb 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -416,7 +416,9 @@ def try_command(command): return False -def scan(command, env=None, allowed_return_codes=[]): +def scan(command, env=None, allowed_return_codes=None): + if allowed_return_codes is None: + allowed_return_codes = [] try: response = subprocess.check_output( command,