Skip to content

Commit

Permalink
Store to sqlite
Browse files Browse the repository at this point in the history
Signed-off-by: Prabhu Subramanian <[email protected]>
  • Loading branch information
prabhu committed Mar 16, 2024
1 parent b52731c commit 4337b97
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 58 deletions.
74 changes: 36 additions & 38 deletions vdb/lib/aqua.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,18 @@
DOWNLOAD_CHUNK_SIZE = 4096




class AquaSource(NvdSource):
"""Aqua CVE source"""

def download_all(self, local_store=True):
def download_all(self):
"""Download all cve data"""
# For performance do not retain the whole data in-memory
# See: https://github.com/AppThreat/vulnerability-db/issues/27
data_list = []
self.fetch(config.VULN_LIST_URL)
return data_list

def download_recent(self, local_store=True):
def download_recent(self):
pass

def _process_zip(self, zname):
Expand Down Expand Up @@ -96,16 +94,16 @@ def convert(self, cve_data):
@staticmethod
def is_supported_source(zfname):
for distro in (
"alpine",
"cwe",
"ghsa",
"go",
"osv",
"redhat-cpe",
"kevc",
"oval",
"glad",
"mariner",
"alpine",
"cwe",
"ghsa",
"go",
"osv",
"redhat-cpe",
"kevc",
"oval",
"glad",
"mariner",
):
if distro in zfname:
return False
Expand Down Expand Up @@ -335,8 +333,8 @@ def ubuntu_to_vuln(cve_data):
# DNE - does not exist
# needs-triage - Vulnerability is not confirmed.
if status_obj.get("Status") in (
"DNE",
"needs-triage",
"DNE",
"needs-triage",
):
continue
version_start_including = ""
Expand All @@ -352,25 +350,25 @@ def ubuntu_to_vuln(cve_data):
fix_note = fix_note.split(":")[-1]
# Released CVEs have fixes
if (
status_obj.get("Status")
in (
status_obj.get("Status")
in (
"not-affected",
"released",
)
and " " not in fix_note
and "CVE" not in fix_note
)
and " " not in fix_note
and "CVE" not in fix_note
):
fix_version_start_including = fix_note
version_end_excluding = fix_note
# Handle CVEs that are deferred
# Let's include the vulnerabilities that did not get a fix
if (
status_obj.get("Status")
in (
status_obj.get("Status")
in (
"deferred",
"needed",
)
and " " not in fix_note
)
and " " not in fix_note
):
version_end_including = config.placeholder_fix_version
if status_obj.get("Status") in ("ignored",):
Expand Down Expand Up @@ -772,16 +770,16 @@ def debian_to_vuln(cve_data):
ann
for ann in annotations
if ann.get("Release")
and (ann.get("Version") or ann.get("Kind") == "not-affected")
and (ann.get("Version") or ann.get("Kind") == "not-affected")
]
for ann in annotations:
if ann.get("Type") == "RESERVED" or ann.get("Original") == "RESERVED":
continue
# Try to dealias
if (
not cve_id.startswith("CVE")
and ann.get("Type") == "xref"
and ann.get("Bugs")
not cve_id.startswith("CVE")
and ann.get("Type") == "xref"
and ann.get("Bugs")
):
aliases_block = """
## Related CVE(s)
Expand All @@ -796,13 +794,13 @@ def debian_to_vuln(cve_data):
break
kind = ann.get("Kind")
if kind not in (
"fixed",
"unfixed",
"no-dsa",
"end-of-life",
"ignored",
"not-affected",
"postponed",
"fixed",
"unfixed",
"no-dsa",
"end-of-life",
"ignored",
"not-affected",
"postponed",
):
continue
pkg_name = ann.get("Package")
Expand All @@ -824,8 +822,8 @@ def debian_to_vuln(cve_data):
severity = "MEDIUM"
# Try harder to set LOW priority
if kind == "postponed" or (
ann.get("Description")
and "minor issue" in ann.get("Description").lower()
ann.get("Description")
and "minor issue" in ann.get("Description").lower()
):
severity = "LOW"
if ann.get("Severity"):
Expand Down
5 changes: 3 additions & 2 deletions vdb/lib/cve.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pickle
import uuid

import orjson
Expand Down Expand Up @@ -358,11 +359,11 @@ def store5(self, data: list[CVE]):
exclude_unset=True,
exclude_none=True)
self.db_conn.execute(
"INSERT INTO cve_data values(?, ?, ?, ?, json(?), ?);", (
"INSERT INTO cve_data values(?, ?, ?, ?, ?, ?);", (
cve_id.model_dump(mode="python"), affected.vendor,
affected.product,
affected.packageName,
orjson.dumps(source_data).decode("utf-8", "ignore"),
pickle.dumps(source_data),
None))
cleaned_versions = [v.model_dump(mode="json", exclude_none=True) for v in versions]
self.index_conn.execute(
Expand Down
15 changes: 11 additions & 4 deletions vdb/lib/db6.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def ensure_schemas(db_conn: sqlite3.Connection, index_conn: sqlite3.Connection):
"""Create the sqlite tables and indexes in case they don't exist"""
db_conn.execute(
"CREATE TABLE if not exists cve_data(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, source_data JSON NOT NULL, override_data JSON);")
"CREATE TABLE if not exists cve_data(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, source_data BLOB NOT NULL, override_data BLOB);")
db_conn.execute(
"CREATE INDEX if not exists idx1 on cve_data(cve_id, type);")
index_conn.execute(
Expand All @@ -26,13 +26,20 @@ def ensure_schemas(db_conn: sqlite3.Connection, index_conn: sqlite3.Connection):
"CREATE INDEX if not exists cidx3 on cve_index(namespace, name);")


def get(db_file: str = config.VDB_BIN_FILE, index_file: str = config.VDB_BIN_INDEX) -> (
def get(db_file: str = config.VDB_BIN_FILE, index_file: str = config.VDB_BIN_INDEX, read_only=False) -> (
sqlite3.Connection, sqlite3.Connection):
global db_conn, index_conn, tables_created
if not db_file.startswith("file:"):
db_file = f"file:{db_file}"
if not index_file.startswith("file:"):
index_file = f"file:{index_file}"
if read_only:
db_file = f"{db_file}?mode=ro"
index_file = f"{index_file}?mode=ro"
if not db_conn:
db_conn = sqlite3.connect(db_file)
db_conn = sqlite3.connect(db_file, uri=True)
if not index_conn:
index_conn = sqlite3.connect(index_file)
index_conn = sqlite3.connect(index_file, uri=True)
if not tables_created:
ensure_schemas(db_conn, index_conn)
tables_created = True
Expand Down
9 changes: 4 additions & 5 deletions vdb/lib/gha.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,22 @@ def get_query(qtype="recent"):
class GitHubSource(NvdSource):
"""GitHub CVE source"""

def download_all(self, local_store=True):
def download_all(self):
"""Download all historic cve data"""
data_list = []
last_id = None
for y in range(0, int(config.GHA_PAGES_COUNT)):
data, page_info = self.fetch(vtype=last_id)
if data:
if local_store:
self.store(data)
self.store(data)
if page_info and page_info["hasNextPage"]:
last_id = page_info["endCursor"]
return data_list

def download_recent(self, local_store=True):
def download_recent(self):
"""Method which downloads the recent CVE"""
data, _ = self.fetch("recent")
if data and local_store:
if data:
self.store(data)
return data

Expand Down
8 changes: 4 additions & 4 deletions vdb/lib/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,20 @@ def fetch(self, payload):
json_data = r.json()
return self.convert(json_data)

def download_recent(self, local_store=True):
def download_recent(self):
"""Method which downloads the recent CVE"""
client = httpx.Client(http2=True, follow_redirects=True, timeout=180)
url = config.NPM_ADVISORIES_URL + "?perPage=100&page=1"
r = client.get(url=url)
if r.ok:
json_data = r.json()
data = self.convert(json_data.get("objects"))
if data and local_store:
if data:
self.store(data)
return data
return []

def download_all(self, local_store=True):
def download_all(self):
"""Download all historic cve data"""
data_list = []
client = httpx.Client(http2=True, follow_redirects=True, timeout=180)
Expand All @@ -90,7 +90,7 @@ def download_all(self, local_store=True):
if r.ok:
json_data = r.json()
data = self.convert(json_data.get("objects"))
if data and local_store:
if data:
self.store(data)
data_list += data
if json_data.get("urls", {}).get("next"):
Expand Down
8 changes: 3 additions & 5 deletions vdb/lib/osv.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,18 @@
class OSVSource(NvdSource):
"""OSV CVE source"""

def download_all(self, local_store=True):
def download_all(self):
"""Download all cve data"""
# For performance do not retain the whole data in-memory
# See: https://github.com/AppThreat/vulnerability-db/issues/27
data_list = []
for _, url in config.osv_url_dict.items():
data = self.fetch(url)
if not data:
continue
if local_store:
if data:
self.store(data)
return data_list

def download_recent(self, local_store=True):
def download_recent(self):
pass

def fetch(self, url):
Expand Down

0 comments on commit 4337b97

Please sign in to comment.