From 3d1903fbdd4ae859a7359350f9bf1963b6c317a7 Mon Sep 17 00:00:00 2001
From: Sanket Saurav <sanketsaurav@gmail.com>
Date: Wed, 7 Aug 2019 12:55:54 +0530
Subject: [PATCH] Fix some bug risks and code quality issues

Changes:
 - In `gatherers/gathererabc.py`, `utils/scan_utils.py` and `utils/utils.py`, fix dangerous default argument (mutables) from being passed in methods
 - In `scanners/sslyze.py`, fix an instance of unnecessary `len()` call being used to check for emptiness of an iterable
 - In `gatherers/censys.py`, `gatherers/rdns.py` and `utils/scan_utils.py`, make logging calls to lazily evaluate strings

Also added a .deepsource.toml configuration file to run Continuous Quality analysis on the repo.
---
 .deepsource.toml         | 19 +++++++++++++++++++
 gatherers/censys.py      |  2 +-
 gatherers/gathererabc.py |  4 +++-
 gatherers/rdns.py        |  2 +-
 scanners/sslyze.py       |  4 ++--
 utils/scan_utils.py      | 13 +++++++++----
 utils/utils.py           |  4 +++-
 7 files changed, 38 insertions(+), 10 deletions(-)
 create mode 100644 .deepsource.toml

diff --git a/.deepsource.toml b/.deepsource.toml
new file mode 100644
index 00000000..b7a669d8
--- /dev/null
+++ b/.deepsource.toml
@@ -0,0 +1,19 @@
+version = 1
+
+test_patterns = [
+  'tests/**'
+]
+
+exclude_patterns = [
+  'docs/**',
+  'lambda/**'
+]
+
+[[analyzers]]
+  name = 'python'
+  enabled = true
+  runtime_version = '3.x.x'
+  
+  [analyzers.meta]
+    max_line_length = 100
+    skip_doc_coverage = ["module", "magic", "class"]
diff --git a/gatherers/censys.py b/gatherers/censys.py
index 205257dd..e60e0c8a 100644
--- a/gatherers/censys.py
+++ b/gatherers/censys.py
@@ -61,7 +61,7 @@ def gather(self):
 
         # Construct the query.
         query = query_for(self.suffixes)
-        logging.debug("Censys query:\n%s\n" % query)
+        logging.debug("Censys query:\n%s\n", query)
 
         # Plan to store in cache/censys/export.csv.
         download_path = utils.cache_path(
diff --git a/gatherers/gathererabc.py b/gatherers/gathererabc.py
index 47762627..fa0ac476 100644
--- a/gatherers/gathererabc.py
+++ b/gatherers/gathererabc.py
@@ -5,7 +5,9 @@
 
 class Gatherer(metaclass=ABCMeta):
 
-    def __init__(self, suffixes: List[str], options: dict, extra: dict={}):
+    def __init__(self, suffixes: List[str], options: dict, extra: dict = None):
+        if extra is None:
+            extra = {}
         self.suffixes = suffixes
         self.options = options
         self.extra = extra
diff --git a/gatherers/rdns.py b/gatherers/rdns.py
index 5cf17da9..00bdc38e 100644
--- a/gatherers/rdns.py
+++ b/gatherers/rdns.py
@@ -42,7 +42,7 @@ def gather(self):
             exit(1)
 
         with open(path) as lines:
-            logging.debug("\tReading %s..." % path)
+            logging.debug("\tReading %s...", path)
 
             for record in process_lines(lines, ip_filter, number_filter):
                 yield record
diff --git a/scanners/sslyze.py b/scanners/sslyze.py
index c2d1e573..25a06c04 100644
--- a/scanners/sslyze.py
+++ b/scanners/sslyze.py
@@ -353,7 +353,7 @@ def analyze_protocols_and_ciphers(data, sslv2, sslv3, tlsv1, tlsv1_1, tlsv1_2, t
     )
     data['ciphers'] = [cipher.name for cipher in accepted_ciphers]
 
-    if len(accepted_ciphers) > 0:
+    if accepted_ciphers:
         # Look at accepted cipher suites for RC4 or DHE.
         # This is imperfect, as the advertising of RC4 could discriminate based on client.
         # DHE and ECDHE may not remain the only forward secret options for TLS.
@@ -526,7 +526,7 @@ def analyze_certs(certs):
         if oid in apple_ev:
             browsers.append("Apple")
 
-        if len(browsers) > 0:
+        if browsers:
             data['certs']['ev']['trusted'] = True
 
             # Log each new OID we observe as marked for EV.
diff --git a/utils/scan_utils.py b/utils/scan_utils.py
index f441bdf7..6aa4c0d7 100644
--- a/utils/scan_utils.py
+++ b/utils/scan_utils.py
@@ -113,8 +113,10 @@ def format_last_exception():
 
 
 # Command Line Conveniences #
-def scan(command: List[str], env: dict=None,
-         allowed_return_codes: list=[]) -> Union[str, None]:
+def scan(command: List[str], env: dict = None,
+         allowed_return_codes: list = None) -> Union[str, None]:
+    if allowed_return_codes is None:
+        allowed_return_codes = []
     try:
         response = subprocess.check_output(
             command,
@@ -180,7 +182,7 @@ def configure_logging(options: Union[dict, None]=None) -> None:
 # This loads the whole thing into memory: it's not a great solution for
 # super-large lists of domains.
 def sort_csv(input_filename):
-    logging.warning("Sorting %s..." % input_filename)
+    logging.warning("Sorting %s...", input_filename)
 
     input_file = open(input_filename, encoding='utf-8', newline='')
     tmp_filename = "%s.tmp" % input_filename
@@ -221,7 +223,10 @@ def sort_csv(input_filename):
     shutil.move(tmp_filename, input_filename)
 
 
-def write_rows(rows, domain, base_domain, scanner, csv_writer, meta={}):
+def write_rows(rows, domain, base_domain, scanner, csv_writer, meta=None):
+
+    if meta is None:
+        meta = {}
 
     # If we didn't get any info, we'll still output information about why the scan failed.
     if rows is None:
diff --git a/utils/utils.py b/utils/utils.py
index 3722df0b..3c8dbbeb 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -416,7 +416,9 @@ def try_command(command):
         return False
 
 
-def scan(command, env=None, allowed_return_codes=[]):
+def scan(command, env=None, allowed_return_codes=None):
+    if allowed_return_codes is None:
+        allowed_return_codes = []
     try:
         response = subprocess.check_output(
             command,