Skip to content

Commit

Permalink
replace sed with jq
Browse files Browse the repository at this point in the history
  • Loading branch information
sanchitram1 committed Oct 21, 2024
1 parent 824aeab commit e349474
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 63 deletions.
63 changes: 28 additions & 35 deletions package_managers/homebrew/jq/dependencies.jq
Original file line number Diff line number Diff line change
@@ -1,37 +1,30 @@
# build_dependencies
# dependencies
# test_dependencies
# optional_dependencies
# uses_from_macos
# TODO: variations (linux only, by architecture)
# all of the above are the fields that contain dependency info for Homebrew

# uses from macos sometimes specifies build / test -- right now logging that as macos only

[.[] |
.name as $name |
(
(.uses_from_macos // []) |
map({
package: $name,
dependency_type: "uses_from_macos",
dependency: (if type == "object" then keys[0] else . end)
})
),
(
(.dependencies // []) |
map({package: $name, dependency_type: "dependency", dependency: .})
),
(
(.test_dependencies // []) |
map({package: $name, dependency_type: "test_dependency", dependency: .})
),
(
(.optional_dependencies // []) |
map({package: $name, dependency_type: "optional_dependency", dependency: .})
),
(
(.build_dependencies // []) |
map({package: $name, dependency_type: "build_dependency", dependency: .})
)
| .[]]
[.[] | {
package_name: .name,
build_deps: .build_dependencies,
runtime_deps: .dependencies,
recommended_deps: .recommended_dependencies,
test_deps: .test_dependencies,
optional_deps: .optional_dependencies,
uses_from_macos: .uses_from_macos
} |
# here's where we'd substitute the depends_on_type ids, for each depends_on type ids
# the `[]` at the end is to ensure that we're exploding the arrays, so each dependency gets its own row!
{package_name: .package_name, depends_on_type: $build_deps_type_id, depends_on: .build_deps[]},
{package_name: .package_name, depends_on_type: $runtime_deps_type_id, depends_on: .runtime_deps[]},
{package_name: .package_name, depends_on_type: $recommended_deps_type_id, depends_on: .recommended_deps[]},
{package_name: .package_name, depends_on_type: $test_deps_type_id, depends_on: .test_deps[]},
{package_name: .package_name, depends_on_type: $optional_deps_type_id, depends_on: .optional_deps[]},
{package_name: .package_name, depends_on_type: $uses_from_macos_type_id, depends_on: .uses_from_macos[]}
|
# now, filter out the null dependencies
select(.depends_on != null) |
# and only look at the ones that are strings TODO: some are JSONs?
select(.depends_on | type == "string") |
# generate the sql statements!
"INSERT INTO dependencies (version_id, dependency_id, depends_on_type_id) VALUES (
(SELECT id FROM versions WHERE import_id = '" + .package_name + "'),
(SELECT id FROM packages WHERE name = '" + .depends_on + "'),
'" + .depends_on_type + "');"
] | join("\n")
18 changes: 14 additions & 4 deletions package_managers/homebrew/jq/package_url.jq
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
# mapping package to urls is straightforward
# but, in the first normal form we've gotta do the mapping ourselves
# luckily, homebrew is small enough that we can push some of that work to the db

[.[] | {
package_name: .name,
homepage_url: .homepage,
source_url: .urls.stable.url
} | [
{package_name: .package_name, url: .homepage_url},
{package_name: .package_name, url: .source_url}
] | .[]]
} |
# here's where we substitute the url type ids, for each url type
{package_name: .package_name, type: $homepage_url_type_id, url: .homepage_url},
{package_name: .package_name, type: $source_url_type_id, url: .source_url}
|
# and here we say "for each url, generate an insert statement"
"INSERT INTO package_urls (package_id, url_id) VALUES (
(SELECT id FROM packages WHERE name = '" + .package_name + "'),
(SELECT id FROM urls WHERE url = '" + .url + "' AND url_type_id = '" + .type + "'));"
] | join("\n")
15 changes: 13 additions & 2 deletions package_managers/homebrew/jq/packages.jq
Original file line number Diff line number Diff line change
@@ -1,2 +1,13 @@
# we just need the name for the packages models
[.[] | {name: .name, derived_id: ("homebrew/" + .name), import_id: .name, readme: null}]

[.[] |
"INSERT INTO packages (name, derived_id, import_id, package_manager_id) VALUES ('" +
# for every single row, extract the name => it's the only key we need from Homebrew
(.name) + "', '" +
# the derived_id is the package manager name + "/" + the package name, which enforces
# uniqueness on the packages table
("homebrew/" + .name) + "', '" +
# the import_id is the same as the package name (used for joins)
.name + "', '" +
# the package manager ID is passed in as a variable
$package_manager_id + "');"
] | join("\n")
18 changes: 12 additions & 6 deletions package_managers/homebrew/jq/urls.jq
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
# homepage is at the main key
# source is inside stable, and it's the tarball
# from our sources.json, we're extracting homepage and source:
# homepage is at the main key
# source is inside stable, and it's the tarball

# for every single row, extract the homepage and source:
[.[] | {
homepage: .homepage,
source: .urls.stable.url
} | to_entries | map({
# `map` basically explodes the json, creating two rows for each JSON object
name: .key,
url: .value
}) | .[] | {
url: .url,
url_type: .name
}]
}) | .[] |
# and here, we can generate our SQL statement!
"INSERT INTO urls (url, url_type_id) VALUES ('" +
.url + "', '" +
if .name == "source" then $source_url_type_id else $homepage_url_type_id end + "');"
] | join("\n")
13 changes: 9 additions & 4 deletions package_managers/homebrew/jq/versions.jq
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
# homebrew has the problem where there are no versions
# we're gonna assume the version available is the latest
# and we'll deal with that later

# TODO: `downloads: .analytics.install_on_request."365d".[$name]`
# above gives us the downloads for the last 365 days
# not available in the full JSON API

# TODO: there are also a problem of versioned formulae

# TODO: licenses is in source.json, but we need a long-term mapping solution

[.[] |
.name as $name |
{
version: .versions.stable,
import_id: .name,
license: .license
}]
import_id: .name
} |
"INSERT INTO versions (version, package_id) VALUES ('" +
.version + "', '" +
.import_id + "');"
] | join("\n")
2 changes: 0 additions & 2 deletions package_managers/homebrew/sed/dependencies.sed

This file was deleted.

2 changes: 0 additions & 2 deletions package_managers/homebrew/sed/package_url.sed

This file was deleted.

2 changes: 0 additions & 2 deletions package_managers/homebrew/sed/packages.sed

This file was deleted.

4 changes: 0 additions & 4 deletions package_managers/homebrew/sed/urls.sed

This file was deleted.

2 changes: 0 additions & 2 deletions package_managers/homebrew/sed/versions.sed

This file was deleted.

0 comments on commit e349474

Please sign in to comment.