diff --git a/vdb/lib/aqua.py b/vdb/lib/aqua.py index f3daa06..e4dd462 100644 --- a/vdb/lib/aqua.py +++ b/vdb/lib/aqua.py @@ -23,12 +23,10 @@ DOWNLOAD_CHUNK_SIZE = 4096 - - class AquaSource(NvdSource): """Aqua CVE source""" - def download_all(self, local_store=True): + def download_all(self): """Download all cve data""" # For performance do not retain the whole data in-memory # See: https://github.com/AppThreat/vulnerability-db/issues/27 @@ -36,7 +34,7 @@ def download_all(self, local_store=True): self.fetch(config.VULN_LIST_URL) return data_list - def download_recent(self, local_store=True): + def download_recent(self): pass def _process_zip(self, zname): @@ -96,16 +94,16 @@ def convert(self, cve_data): @staticmethod def is_supported_source(zfname): for distro in ( - "alpine", - "cwe", - "ghsa", - "go", - "osv", - "redhat-cpe", - "kevc", - "oval", - "glad", - "mariner", + "alpine", + "cwe", + "ghsa", + "go", + "osv", + "redhat-cpe", + "kevc", + "oval", + "glad", + "mariner", ): if distro in zfname: return False @@ -335,8 +333,8 @@ def ubuntu_to_vuln(cve_data): # DNE - does not exist # needs-triage - Vulnerability is not confirmed. if status_obj.get("Status") in ( - "DNE", - "needs-triage", + "DNE", + "needs-triage", ): continue version_start_including = "" @@ -352,25 +350,25 @@ def ubuntu_to_vuln(cve_data): fix_note = fix_note.split(":")[-1] # Released CVEs have fixes if ( - status_obj.get("Status") - in ( + status_obj.get("Status") + in ( "not-affected", "released", - ) - and " " not in fix_note - and "CVE" not in fix_note + ) + and " " not in fix_note + and "CVE" not in fix_note ): fix_version_start_including = fix_note version_end_excluding = fix_note # Handle CVEs that are deferred # Let's include the vulnerabilities that did not get a fix if ( - status_obj.get("Status") - in ( + status_obj.get("Status") + in ( "deferred", "needed", - ) - and " " not in fix_note + ) + and " " not in fix_note ): version_end_including = config.placeholder_fix_version if status_obj.get("Status") in ("ignored",): @@ -772,16 +770,16 @@ def debian_to_vuln(cve_data): ann for ann in annotations if ann.get("Release") - and (ann.get("Version") or ann.get("Kind") == "not-affected") + and (ann.get("Version") or ann.get("Kind") == "not-affected") ] for ann in annotations: if ann.get("Type") == "RESERVED" or ann.get("Original") == "RESERVED": continue # Try to dealias if ( - not cve_id.startswith("CVE") - and ann.get("Type") == "xref" - and ann.get("Bugs") + not cve_id.startswith("CVE") + and ann.get("Type") == "xref" + and ann.get("Bugs") ): aliases_block = """ ## Related CVE(s) @@ -796,13 +794,13 @@ def debian_to_vuln(cve_data): break kind = ann.get("Kind") if kind not in ( - "fixed", - "unfixed", - "no-dsa", - "end-of-life", - "ignored", - "not-affected", - "postponed", + "fixed", + "unfixed", + "no-dsa", + "end-of-life", + "ignored", + "not-affected", + "postponed", ): continue pkg_name = ann.get("Package") @@ -824,8 +822,8 @@ def debian_to_vuln(cve_data): severity = "MEDIUM" # Try harder to set LOW priority if kind == "postponed" or ( - ann.get("Description") - and "minor issue" in ann.get("Description").lower() + ann.get("Description") + and "minor issue" in ann.get("Description").lower() ): severity = "LOW" if ann.get("Severity"): diff --git a/vdb/lib/cve.py b/vdb/lib/cve.py index e0d6926..3486e5d 100644 --- a/vdb/lib/cve.py +++ b/vdb/lib/cve.py @@ -1,4 +1,5 @@ import os +import pickle import uuid import orjson @@ -358,11 +359,11 @@ def store5(self, data: list[CVE]): exclude_unset=True, exclude_none=True) self.db_conn.execute( - "INSERT INTO cve_data values(?, ?, ?, ?, json(?), ?);", ( + "INSERT INTO cve_data values(?, ?, ?, ?, ?, ?);", ( cve_id.model_dump(mode="python"), affected.vendor, affected.product, affected.packageName, - orjson.dumps(source_data).decode("utf-8", "ignore"), + pickle.dumps(source_data), None)) cleaned_versions = [v.model_dump(mode="json", exclude_none=True) for v in versions] self.index_conn.execute( diff --git a/vdb/lib/db6.py b/vdb/lib/db6.py index f196b22..e259d33 100644 --- a/vdb/lib/db6.py +++ b/vdb/lib/db6.py @@ -13,7 +13,7 @@ def ensure_schemas(db_conn: sqlite3.Connection, index_conn: sqlite3.Connection): """Create the sqlite tables and indexes in case they don't exist""" db_conn.execute( - "CREATE TABLE if not exists cve_data(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, source_data JSON NOT NULL, override_data JSON);") + "CREATE TABLE if not exists cve_data(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, source_data BLOB NOT NULL, override_data BLOB);") db_conn.execute( "CREATE INDEX if not exists idx1 on cve_data(cve_id, type);") index_conn.execute( @@ -26,13 +26,20 @@ def ensure_schemas(db_conn: sqlite3.Connection, index_conn: sqlite3.Connection): "CREATE INDEX if not exists cidx3 on cve_index(namespace, name);") -def get(db_file: str = config.VDB_BIN_FILE, index_file: str = config.VDB_BIN_INDEX) -> ( +def get(db_file: str = config.VDB_BIN_FILE, index_file: str = config.VDB_BIN_INDEX, read_only=False) -> ( sqlite3.Connection, sqlite3.Connection): global db_conn, index_conn, tables_created + if not db_file.startswith("file:"): + db_file = f"file:{db_file}" + if not index_file.startswith("file:"): + index_file = f"file:{index_file}" + if read_only: + db_file = f"{db_file}?mode=ro" + index_file = f"{index_file}?mode=ro" if not db_conn: - db_conn = sqlite3.connect(db_file) + db_conn = sqlite3.connect(db_file, uri=True) if not index_conn: - index_conn = sqlite3.connect(index_file) + index_conn = sqlite3.connect(index_file, uri=True) if not tables_created: ensure_schemas(db_conn, index_conn) tables_created = True diff --git a/vdb/lib/gha.py b/vdb/lib/gha.py index 0c57063..56a39af 100755 --- a/vdb/lib/gha.py +++ b/vdb/lib/gha.py @@ -85,23 +85,22 @@ def get_query(qtype="recent"): class GitHubSource(NvdSource): """GitHub CVE source""" - def download_all(self, local_store=True): + def download_all(self): """Download all historic cve data""" data_list = [] last_id = None for y in range(0, int(config.GHA_PAGES_COUNT)): data, page_info = self.fetch(vtype=last_id) if data: - if local_store: - self.store(data) + self.store(data) if page_info and page_info["hasNextPage"]: last_id = page_info["endCursor"] return data_list - def download_recent(self, local_store=True): + def download_recent(self): """Method which downloads the recent CVE""" data, _ = self.fetch("recent") - if data and local_store: + if data: self.store(data) return data diff --git a/vdb/lib/npm.py b/vdb/lib/npm.py index daa65f5..6c7b310 100644 --- a/vdb/lib/npm.py +++ b/vdb/lib/npm.py @@ -67,7 +67,7 @@ def fetch(self, payload): json_data = r.json() return self.convert(json_data) - def download_recent(self, local_store=True): + def download_recent(self): """Method which downloads the recent CVE""" client = httpx.Client(http2=True, follow_redirects=True, timeout=180) url = config.NPM_ADVISORIES_URL + "?perPage=100&page=1" @@ -75,12 +75,12 @@ def download_recent(self, local_store=True): if r.ok: json_data = r.json() data = self.convert(json_data.get("objects")) - if data and local_store: + if data: self.store(data) return data return [] - def download_all(self, local_store=True): + def download_all(self): """Download all historic cve data""" data_list = [] client = httpx.Client(http2=True, follow_redirects=True, timeout=180) @@ -90,7 +90,7 @@ def download_all(self, local_store=True): if r.ok: json_data = r.json() data = self.convert(json_data.get("objects")) - if data and local_store: + if data: self.store(data) data_list += data if json_data.get("urls", {}).get("next"): diff --git a/vdb/lib/osv.py b/vdb/lib/osv.py index 3abbd9a..3a07a62 100644 --- a/vdb/lib/osv.py +++ b/vdb/lib/osv.py @@ -34,20 +34,18 @@ class OSVSource(NvdSource): """OSV CVE source""" - def download_all(self, local_store=True): + def download_all(self): """Download all cve data""" # For performance do not retain the whole data in-memory # See: https://github.com/AppThreat/vulnerability-db/issues/27 data_list = [] for _, url in config.osv_url_dict.items(): data = self.fetch(url) - if not data: - continue - if local_store: + if data: self.store(data) return data_list - def download_recent(self, local_store=True): + def download_recent(self): pass def fetch(self, url):