Skip to content

Commit

Permalink
Extend classify support to npm, maven, crates
Browse files Browse the repository at this point in the history
  • Loading branch information
msuozzo committed Feb 24, 2025
1 parent e60b4c8 commit 4ffa3f8
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 0 deletions.
62 changes: 62 additions & 0 deletions internal/netclassify/classify.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,23 @@ var (
pythonSourceRegex = regexp.MustCompile(`^(?P<package>[\w\.]+)-(?P<version>.+?)(?P<ext>\.(zip|tar\.gz|tar\.bz2|tar\.xz|tar\.Z|tar))$`)
)

// NPM
var (
npmAPIRegex = regexp.MustCompile(`^https://registry\.(npmjs\.org|yarnpkg\.com)/((@[^/]+/)?[^/]+)/([^/]+)/?$`)
npmFileRegex = regexp.MustCompile(`^https://registry\.(npmjs\.org|yarnpkg\.com)/((@[^/]+/)?[^/]+)/-/([^/]+)-([^/-]+)\.tgz$`)
)

// Maven
var (
mavenRegex = regexp.MustCompile(`^https?://(repo1\.maven\.org/maven2|plugins.gradle.org/m2)/(.+)/([^/]+)/([^/]+)/([^/]+)$`)
)

// Crates (Rust)
var (
cratesAPIRegex = regexp.MustCompile(`^https?://crates\.io/api/v1/crates/([^/]+)/([^/]+)(?:/\w+)?$`)
cratesFileRegex = regexp.MustCompile(`^https?://crates\.io/api/v1/crates/([^/]+)/([^/]+)/download$`)
)

// GCS
var (
// https://cloud.google.com/storage/docs/json_api
Expand Down Expand Up @@ -83,6 +100,16 @@ func ClassifyURL(rawURL string) (string, error) {
return classifyPyPIURL(rawURL)
} else if pypiAPIRegex.MatchString(rawURL) {
return "", ErrSkipped
} else if npmFileRegex.MatchString(rawURL) {
return classifyNPMURL(rawURL)
} else if npmAPIRegex.MatchString(rawURL) {
return "", ErrSkipped
} else if cratesFileRegex.MatchString(rawURL) {
return classifyCratesURL(rawURL)
} else if cratesAPIRegex.MatchString(rawURL) {
return "", ErrSkipped
} else if mavenRegex.MatchString(rawURL) {
return classifyMavenURL(rawURL)
} else if gcsJSONRegex.MatchString(rawURL) {
return classifyGCSURL(rawURL, gcsJSONRegex)
} else if gcsXMLRegex.MatchString(rawURL) {
Expand Down Expand Up @@ -202,6 +229,41 @@ func classifyPyPIFile(fname string) (string, error) {
}
}

func classifyNPMURL(rawURL string) (string, error) {
matches := npmFileRegex.FindStringSubmatch(rawURL)
if len(matches) < 5 {
return "", errors.New("invalid NPM download URL format")
}
packagePath := matches[2]
version := matches[5]
return fmt.Sprintf("pkg:npm/%s@%s", packagePath, version), nil
}

func classifyCratesURL(rawURL string) (string, error) {
matches := cratesFileRegex.FindStringSubmatch(rawURL)
if len(matches) < 3 {
return "", errors.New("invalid Cargo URL format")
}
name := matches[1]
version := matches[2]
return fmt.Sprintf("pkg:cargo/%s@%s", name, version), nil
}

func classifyMavenURL(rawURL string) (string, error) {
matches := mavenRegex.FindStringSubmatch(rawURL)
if len(matches) < 6 {
return "", errors.New("invalid Maven URL format")
}
pathSegments := strings.Split(matches[2], "/")
if len(pathSegments) < 2 {
return "", errors.New("invalid Maven path format")
}
name := matches[3]
version := matches[4]
namespace := strings.Join(pathSegments, ".")
return fmt.Sprintf("pkg:maven/%s/%s@%s", namespace, name, version), nil
}

func classifyGCSURL(rawURL string, pattern *regexp.Regexp) (string, error) {
matches := pattern.FindStringSubmatch(rawURL)
bucket, object := matches[pattern.SubexpIndex("bucket")], matches[pattern.SubexpIndex("object")]
Expand Down
71 changes: 71 additions & 0 deletions internal/netclassify/classify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,77 @@ func TestClassifyURL(t *testing.T) {
wantErr: ErrUnclassified,
},

// NPM test cases
{
name: "npm_download_simple",
url: "https://registry.npmjs.org/express/-/express-4.17.1.tgz",
want: "pkg:npm/[email protected]",
},
{
name: "npm_download_scoped",
url: "https://registry.npmjs.org/@invisionag/eslint-config-ivx/-/eslint-config-ivx-0.0.2.tgz",
want: "pkg:npm/@invisionag/[email protected]",
},
{
name: "npm_yarn_download_simple",
url: "https://registry.yarnpkg.com/express/-/express-4.17.1.tgz",
want: "pkg:npm/[email protected]",
},
{
name: "npm_api_scoped",
url: "https://registry.npmjs.org/@esbuild/freebsd-arm64/0.21.5",
wantErr: ErrSkipped,
},
{
name: "npm_api_simple",
url: "https://registry.npmjs.org/express/4.17.1",
wantErr: ErrSkipped,
},
{
name: "npm_yarn_api_simple",
url: "https://registry.yarnpkg.com/express/4.17.1",
wantErr: ErrSkipped,
},

// Maven test cases
{
name: "maven_central_artifact",
url: "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar",
want: "pkg:maven/org.apache.commons/[email protected]",
},
{
name: "maven_with_classifier",
url: "https://repo1.maven.org/maven2/org/apache/spark/spark-core_2.12/3.1.2/spark-core_2.12-3.1.2-tests.jar",
want: "pkg:maven/org.apache.spark/[email protected]",
},
{
name: "maven_gradle_plugin_repo_artifact",
url: "https://plugins.gradle.org/m2/com/google/protobuf/com.google.protobuf.gradle.plugin/0.9.4/com.google.protobuf.gradle.plugin-0.9.4.pom",
want: "pkg:maven/com.google.protobuf/[email protected]",
},

// Crates (Rust) test cases
{
name: "crates_download",
url: "https://crates.io/api/v1/crates/rand/0.7.2/download",
want: "pkg:cargo/[email protected]",
},
{
name: "crates_api_package",
url: "https://crates.io/api/v1/crates/rand",
wantErr: ErrUnclassified,
},
{
name: "crates_api",
url: "https://crates.io/api/v1/crates/rand/0.7.2",
wantErr: ErrSkipped,
},
{
name: "crates_api_deps",
url: "https://crates.io/api/v1/crates/rand/0.7.2/dependencies",
wantErr: ErrSkipped,
},

// gcs URL tests
{
name: "valid GCS URL",
Expand Down

0 comments on commit 4ffa3f8

Please sign in to comment.