Skip to content

Commit

Permalink
Tweak the dl_docs.py script
Browse files Browse the repository at this point in the history
This also adds a simple shell script, some docs, and a shell.nix

Signed-off-by: Michael Lieberman <[email protected]>
  • Loading branch information
mlieberman85 committed Oct 21, 2022
1 parent e6e0271 commit eadecc8
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 25 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@ These folders contain some sample data for ingestion into GUAC

- some-sboms: tiny dataset of a handful of `gcr.io` containers
- top-dh-sboms: dataset of top 100+ dockerhub containers

## Usage

This is intended purely for getting set up with some test data and is not
intended for usage outside of dev and demos. Do not expect much documentation
for a while.
80 changes: 55 additions & 25 deletions dl_docs.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import subprocess
import argparse
from os import path, mkdir

BASE_PATH="docs/"
BASE_PATH = "docs/"

scorecards_data= {
"github.com/kubernetes/kubernetes":
scorecards_data = {
"github.com/kubernetes/kubernetes":
[
"5835544ca568b757a8ecae5c153f317e5736700e",
"b39bf148cd654599a52e867485c02c4f9d28b312",
Expand Down Expand Up @@ -45,57 +46,70 @@

k8s_slsa_data = k8s_versions

containers_data= {
"k8s.gcr.io/kube-proxy": k8s_versions,
"k8s.gcr.io/kube-controller-manager": k8s_versions,
"k8s.gcr.io/kube-apiserver": k8s_versions,
"k8s.gcr.io/kube-scheduler": k8s_versions,
containers_data = {
"k8s.gcr.io/kube-proxy": k8s_versions,
"k8s.gcr.io/kube-controller-manager": k8s_versions,
"k8s.gcr.io/kube-apiserver": k8s_versions,
"k8s.gcr.io/kube-scheduler": k8s_versions,
}


def scorecard_cmd(repo, commit, fdir):
fpath = path.join(fdir, 'scorecard-{}-{}.json'.format(repo.split('/')[-1], commit))
cmd = ' '.join(["scorecard", "--repo={}".format(repo), "--commit={}".format(commit), "--format=json"])
print_msg(fpath, cmd)
fpath = path.join(
fdir, 'scorecard-{}-{}.json'.format(repo.split('/')[-1], commit))
cmd = ["scorecard",
"--repo={}".format(repo), "--commit={}".format(commit), "--format=json"]
print_msg(fpath, ' '.join(cmd))

f = open(fpath, 'w')
subprocess.call(cmd,shell=True, stdout=f)
subprocess.run(cmd, stdout=f)
f.close()


def kube_slsa_cmd(version, fdir):
fpath = path.join(fdir, 'kube-slsa-{}.json'.format(version))
cmd = "curl -sL https://dl.k8s.io/release/{}/provenance.json | jq".format(version)
print_msg(fpath, cmd)

f= open(fpath, 'w')
subprocess.call(cmd, shell=True, stdout=f)
cmd1 = ["curl", "-s", "-L",
"https://dl.k8s.io/release/{}/provenance.json".format(version)]
cmd2 = ["jq"]
print_msg(fpath, [cmd1, cmd2])
f = open(fpath, 'w')
p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
subprocess.run(cmd2, stdin=p1.stdout, stdout=f)
f.close()


def syft_spdx_cmd(container_path, tag, fdir):
fpath = path.join(fdir, 'syft-spdx-{}:{}.json'.format(container_path.replace('/','-'), tag))
cmd = "syft -c config/syft.yaml packages {}:{} -o spdx-json | jq".format(container_path, tag)
print_msg(fpath, cmd)
fpath = path.join(
fdir, 'syft-spdx-{}:{}.json'.format(container_path.replace('/', '-'), tag))
cmd1 = ["syft", "-c", "config/syft.yaml", "packages",
"{}:{}".format(container_path, tag), "-o", "spdx-json"]
cmd2 = ["jq"]
print_msg(fpath, [cmd1, cmd2])
p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)

f= open(fpath, 'w')
subprocess.call(cmd, shell=True, stdout=f)
f = open(fpath, 'w')
subprocess.run(cmd2, stdin=p1.stdout, stdout=f)
f.close()


def main():
def run_all():
if not path.isdir(BASE_PATH):
mkdir(BASE_PATH)

do_spdx()
do_k8s_slsa()
do_scorecards()


def do_scorecards():
subpath = path.join(BASE_PATH, "scorecard")
if not path.isdir(subpath):
mkdir(subpath)

for repo in scorecards_data:
for commit in scorecards_data[repo]:
scorecard_cmd(repo,commit,subpath)
scorecard_cmd(repo, commit, subpath)


def do_k8s_slsa():
subpath = path.join(BASE_PATH, "slsa")
Expand All @@ -116,8 +130,24 @@ def do_spdx():
syft_spdx_cmd(container_path, tag, subpath)



commands = {
"k8s_slsa": do_k8s_slsa,
"scorecards": do_scorecards,
"spdx": do_spdx,
"all": run_all
}

def print_msg(path, cmd):
print("creating file: {}, cmd: {}".format(path, cmd))


if __name__ == "__main__":
main()
parser = argparse.ArgumentParser()
parser.add_argument("--all", action="store_true")
subparser = parser.add_subparsers(dest="command")
for k in commands.keys():
subparser.add_parser(k)
args = parser.parse_args()

commands[args.command]()
26 changes: 26 additions & 0 deletions gen_sboms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -e

# NOTE: This is going away in favor of dl_docs.py.
# This expects you to have syft installed, gcloud cli installed as well as
# having logged into gcloud
REGISTRY_URL=${1-"https://gcr.io/v2/google-containers"}
BUCKET_URL=${2-"gs://oopsallsboms"}
SYFT_VERSION=$(syft version -o json | jq -r .version)
TOKEN=$(gcloud auth print-access-token)
GOOGLE_CONTAINERS=$(curl -H "Authorization: Bearer ${TOKEN}" "${REGISTRY_URL}/tags/list" | jq -r ".child[]")

for image in ${GOOGLE_CONTAINERS}
do
TAGS=$(crane ls gcr.io/google-containers/${image})
for tag in ${TAGS}
do
DIGEST=$(crane digest ${REGISTRY_URL}/${image}:${tag} | sed s/\:/-/)
FILENAME=outputs/${image}-${tag}.${DIGEST}.syft.${SYFT_VERSION}.spdx.json
echo $FILENAME
syft ${REGISTRY_URL}/${image}:${tag} -o spdx-json=${FILENAME}
gcloud alpha storage cp ${FILENAME} ${BUCKET_URL}
docker image prune -a -f
done
done
10 changes: 10 additions & 0 deletions shell.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{ pkgs ? import <nixpkgs> {}}:
with pkgs;
mkShell {
buildInputs = [
google-cloud-sdk
crane
python310Packages.autopep8
scorecard
];
}

0 comments on commit eadecc8

Please sign in to comment.