diff --git a/vdb/lib/cve.py b/vdb/lib/cve.py index 1011512..6f3cb71 100644 --- a/vdb/lib/cve.py +++ b/vdb/lib/cve.py @@ -172,7 +172,7 @@ def to_cve_affected(avuln: Vulnerability) -> Affected | None: # Similar to purl type vendor = parts.group("vendor") # Similar to purl namespace - product = parts.group("package") + product = parts.group("package").removesuffix("\\").removesuffix("!") # Similar to purl name package_name = parts.group("package") if "/" in product: diff --git a/vdb/lib/nvd.py b/vdb/lib/nvd.py index f019138..08058f8 100644 --- a/vdb/lib/nvd.py +++ b/vdb/lib/nvd.py @@ -89,7 +89,8 @@ def filterable_git_url(url: str, hostname: str) -> bool: "git-scm.com", "/blog", "/news", - "/support/" + "/support/", + "/bug_report" ): if part in url.lower(): return True @@ -104,14 +105,14 @@ def get_alt_cpes(cpe_uri, git_urls): for agit_url in git_urls: url_obj = urlparse(agit_url) # Ignore obvious filterable urls - if filterable_git_url(agit_url, url_obj.hostname): + if filterable_git_url(agit_url, url_obj.hostname) and not url_obj.path and not url_obj.query: continue git_repo_name = url_obj.hostname if url_obj.path: paths = [ p for p in url_obj.path.split("/") - if p not in ("/", "pub", "scm", "cgi-bin", "cgit", "gitweb") + if p and p not in ("/", "pub", "scm", "cgi-bin", "cgit", "gitweb") ] if paths: max_path = 3 if len(paths) >= 2 else 2 @@ -132,7 +133,15 @@ def get_alt_cpes(cpe_uri, git_urls): .removesuffix(".git") ) if not parsed_git_repo_names.get(git_repo_name): + # Filter repo names without a path + # eg: github.com + url_obj = urlparse(git_repo_name) + if not url_obj.path: + continue parsed_git_repo_names[git_repo_name] = True + # We only need 2 new aliases + if len(purl_proposal_cache.get(cpe_url, [])) > 2: + purl_proposal_cache[cpe_uri].pop(0) purl_proposal_cache[cpe_uri].add( f"cpe:2.3:a:generic:{git_repo_name}:*:*:*:*:*:*:*:*" )