diff --git a/cmd/bashbrew/docker.go b/cmd/bashbrew/docker.go index 8dd91552..bcd0f277 100644 --- a/cmd/bashbrew/docker.go +++ b/cmd/bashbrew/docker.go @@ -1,7 +1,6 @@ package main import ( - "bufio" "bytes" "crypto/sha256" "encoding/hex" @@ -10,21 +9,13 @@ import ( "os" "os/exec" "path" - "strconv" "strings" "github.com/docker-library/bashbrew/manifest" + "github.com/docker-library/bashbrew/pkg/dockerfile" "github.com/urfave/cli" ) -type dockerfileMetadata struct { - StageFroms []string // every image "FROM" instruction value (or the parent stage's FROM value in the case of a named stage) - StageNames []string // the name of any named stage (in order) - StageNameFroms map[string]string // map of stage names to FROM values (or the parent stage's FROM value in the case of a named stage), useful for resolving stage names to FROM values - - Froms []string // every "FROM" or "COPY --from=xxx" value (minus named and/or numbered stages in the case of "--from=") -} - // this returns the "FROM" value for the last stage (which essentially determines the "base" for the final published image) func (r Repo) ArchLastStageFrom(arch string, entry *manifest.Manifest2822Entry) (string, error) { dockerfileMeta, err := r.archDockerfileMetadata(arch, entry) @@ -46,15 +37,15 @@ func (r Repo) ArchDockerFroms(arch string, entry *manifest.Manifest2822Entry) ([ return dockerfileMeta.Froms, nil } -func (r Repo) dockerfileMetadata(entry *manifest.Manifest2822Entry) (*dockerfileMetadata, error) { +func (r Repo) dockerfileMetadata(entry *manifest.Manifest2822Entry) (*dockerfile.Metadata, error) { return r.archDockerfileMetadata(arch, entry) } -var dockerfileMetadataCache = map[string]*dockerfileMetadata{} +var dockerfileMetadataCache = map[string]*dockerfile.Metadata{} -func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822Entry) (*dockerfileMetadata, error) { +func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822Entry) (*dockerfile.Metadata, error) { if builder := entry.ArchBuilder(arch); builder == "oci-import" { - return &dockerfileMetadata{ + return &dockerfile.Metadata{ StageFroms: []string{ "scratch", }, @@ -79,12 +70,12 @@ func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822En return meta, nil } - dockerfile, err := gitShow(commit, dockerfileFile) + df, err := gitShow(commit, dockerfileFile) if err != nil { return nil, cli.NewMultiError(fmt.Errorf(`failed "git show" for %q from commit %q`, dockerfileFile, commit), err) } - meta, err := parseDockerfileMetadata(dockerfile) + meta, err := dockerfile.Parse(df) if err != nil { return nil, cli.NewMultiError(fmt.Errorf(`failed parsing Dockerfile metadata for %q from commit %q`, dockerfileFile, commit), err) } @@ -93,102 +84,6 @@ func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822En return meta, nil } -func parseDockerfileMetadata(dockerfile string) (*dockerfileMetadata, error) { - meta := &dockerfileMetadata{ - // panic: assignment to entry in nil map - StageNameFroms: map[string]string{}, - // (nil slices work fine) - } - - scanner := bufio.NewScanner(strings.NewReader(dockerfile)) - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - - if line == "" { - // ignore blank lines - continue - } - - if line[0] == '#' { - // TODO handle "escape" parser directive - // TODO handle "syntax" parser directive -- explode appropriately (since custom syntax invalidates our Dockerfile parsing) - // ignore comments - continue - } - - // handle line continuations - // (TODO see note above regarding "escape" parser directive) - for line[len(line)-1] == '\\' && scanner.Scan() { - nextLine := strings.TrimSpace(scanner.Text()) - if nextLine == "" || nextLine[0] == '#' { - // ignore blank lines and comments - continue - } - line = line[0:len(line)-1] + nextLine - } - - fields := strings.Fields(line) - if len(fields) < 1 { - // must be a much more complex empty line?? - continue - } - instruction := strings.ToUpper(fields[0]) - - // TODO balk at ARG / $ in from values - - switch instruction { - case "FROM": - from := fields[1] - - if stageFrom, ok := meta.StageNameFroms[from]; ok { - // if this is a valid stage name, we should resolve it back to the original FROM value of that previous stage (we don't care about inter-stage dependencies for the purposes of either tag dependency calculation or tag building -- just how many there are and what external things they require) - from = stageFrom - } - - // make sure to add ":latest" if it's implied - from = latestizeRepoTag(from) - - meta.StageFroms = append(meta.StageFroms, from) - meta.Froms = append(meta.Froms, from) - - if len(fields) == 4 && strings.ToUpper(fields[2]) == "AS" { - stageName := fields[3] - meta.StageNames = append(meta.StageNames, stageName) - meta.StageNameFroms[stageName] = from - } - case "COPY": - for _, arg := range fields[1:] { - if !strings.HasPrefix(arg, "--") { - // doesn't appear to be a "flag"; time to bail! - break - } - if !strings.HasPrefix(arg, "--from=") { - // ignore any flags we're not interested in - continue - } - from := arg[len("--from="):] - - if stageFrom, ok := meta.StageNameFroms[from]; ok { - // see note above regarding stage names in FROM - from = stageFrom - } else if stageNumber, err := strconv.Atoi(from); err == nil && stageNumber < len(meta.StageFroms) { - // must be a stage number, we should resolve it too - from = meta.StageFroms[stageNumber] - } - - // make sure to add ":latest" if it's implied - from = latestizeRepoTag(from) - - meta.Froms = append(meta.Froms, from) - } - } - } - if err := scanner.Err(); err != nil { - return nil, err - } - return meta, nil -} - func (r Repo) DockerCacheName(entry *manifest.Manifest2822Entry) (string, error) { cacheHash, err := r.dockerCacheHash(entry) if err != nil { diff --git a/cmd/bashbrew/repo.go b/cmd/bashbrew/repo.go index 9005b891..3b063d41 100644 --- a/cmd/bashbrew/repo.go +++ b/cmd/bashbrew/repo.go @@ -6,7 +6,6 @@ import ( "path" "path/filepath" "sort" - "strings" "github.com/docker-library/bashbrew/manifest" ) @@ -39,13 +38,6 @@ func repos(all bool, args ...string) ([]string, error) { return ret, nil } -func latestizeRepoTag(repoTag string) string { - if repoTag != "scratch" && strings.IndexRune(repoTag, ':') < 0 { - return repoTag + ":latest" - } - return repoTag -} - type Repo struct { RepoName string TagName string diff --git a/pkg/dockerfile/parse.go b/pkg/dockerfile/parse.go new file mode 100644 index 00000000..66ff0cfd --- /dev/null +++ b/pkg/dockerfile/parse.go @@ -0,0 +1,124 @@ +package dockerfile + +import ( + "bufio" + "io" + "strconv" + "strings" +) + +type Metadata struct { + StageFroms []string // every image "FROM" instruction value (or the parent stage's FROM value in the case of a named stage) + StageNames []string // the name of any named stage (in order) + StageNameFroms map[string]string // map of stage names to FROM values (or the parent stage's FROM value in the case of a named stage), useful for resolving stage names to FROM values + + Froms []string // every "FROM" or "COPY --from=xxx" value (minus named and/or numbered stages in the case of "--from=") +} + +func Parse(dockerfile string) (*Metadata, error) { + return ParseReader(strings.NewReader(dockerfile)) +} + +func ParseReader(dockerfile io.Reader) (*Metadata, error) { + meta := &Metadata{ + // panic: assignment to entry in nil map + StageNameFroms: map[string]string{}, + // (nil slices work fine) + } + + scanner := bufio.NewScanner(dockerfile) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + if line == "" { + // ignore blank lines + continue + } + + if line[0] == '#' { + // TODO handle "escape" parser directive + // TODO handle "syntax" parser directive -- explode appropriately (since custom syntax invalidates our Dockerfile parsing) + // ignore comments + continue + } + + // handle line continuations + // (TODO see note above regarding "escape" parser directive) + for line[len(line)-1] == '\\' && scanner.Scan() { + nextLine := strings.TrimSpace(scanner.Text()) + if nextLine == "" || nextLine[0] == '#' { + // ignore blank lines and comments + continue + } + line = line[0:len(line)-1] + nextLine + } + + fields := strings.Fields(line) + if len(fields) < 1 { + // must be a much more complex empty line?? + continue + } + instruction := strings.ToUpper(fields[0]) + + // TODO balk at ARG / $ in from values + + switch instruction { + case "FROM": + from := fields[1] + + if stageFrom, ok := meta.StageNameFroms[from]; ok { + // if this is a valid stage name, we should resolve it back to the original FROM value of that previous stage (we don't care about inter-stage dependencies for the purposes of either tag dependency calculation or tag building -- just how many there are and what external things they require) + from = stageFrom + } + + // make sure to add ":latest" if it's implied + from = latestizeRepoTag(from) + + meta.StageFroms = append(meta.StageFroms, from) + meta.Froms = append(meta.Froms, from) + + if len(fields) == 4 && strings.ToUpper(fields[2]) == "AS" { + stageName := fields[3] + meta.StageNames = append(meta.StageNames, stageName) + meta.StageNameFroms[stageName] = from + } + + case "COPY": + for _, arg := range fields[1:] { + if !strings.HasPrefix(arg, "--") { + // doesn't appear to be a "flag"; time to bail! + break + } + if !strings.HasPrefix(arg, "--from=") { + // ignore any flags we're not interested in + continue + } + from := arg[len("--from="):] + + if stageFrom, ok := meta.StageNameFroms[from]; ok { + // see note above regarding stage names in FROM + from = stageFrom + } else if stageNumber, err := strconv.Atoi(from); err == nil && stageNumber < len(meta.StageFroms) { + // must be a stage number, we should resolve it too + from = meta.StageFroms[stageNumber] + } + + // make sure to add ":latest" if it's implied + from = latestizeRepoTag(from) + + meta.Froms = append(meta.Froms, from) + } + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + return meta, nil +} + +func latestizeRepoTag(repoTag string) string { + if repoTag != "scratch" && strings.IndexRune(repoTag, ':') < 0 { + return repoTag + ":latest" + } + return repoTag +} diff --git a/pkg/dockerfile/parse_test.go b/pkg/dockerfile/parse_test.go new file mode 100644 index 00000000..f650b4ed --- /dev/null +++ b/pkg/dockerfile/parse_test.go @@ -0,0 +1,131 @@ +package dockerfile_test + +import ( + "reflect" + "testing" + + "github.com/docker-library/bashbrew/pkg/dockerfile" +) + +func TestParse(t *testing.T) { + for _, td := range []struct { + name string + dockerfile string + metadata dockerfile.Metadata + }{ + { + dockerfile: `FROM scratch`, + metadata: dockerfile.Metadata{ + Froms: []string{"scratch"}, + }, + }, + { + dockerfile: `from bash`, + metadata: dockerfile.Metadata{ + Froms: []string{"bash:latest"}, + }, + }, + { + dockerfile: `fRoM bash:5`, + metadata: dockerfile.Metadata{ + Froms: []string{"bash:5"}, + }, + }, + { + name: "comments+whitespace+continuation", + dockerfile: ` + FROM scratch + + # foo + + # bar + + FROM bash + RUN echo \ + # comment inside continuation + hello \ + world + `, + metadata: dockerfile.Metadata{ + Froms: []string{"scratch", "bash:latest"}, + }, + }, + { + name: "multi-stage", + dockerfile: ` + FROM bash:latest AS foo + FROM busybox:uclibc + # intermediate stage without name + FROM bash:5 AS bar + FROM foo AS foo2 + FROM scratch + COPY --from=foo / / + COPY --from=bar / / + COPY --from=foo2 / / + COPY --chown=1234:5678 /foo /bar + `, + metadata: dockerfile.Metadata{ + StageFroms: []string{"bash:latest", "busybox:uclibc", "bash:5", "bash:latest", "scratch"}, + StageNames: []string{"foo", "bar", "foo2"}, + StageNameFroms: map[string]string{ + "foo": "bash:latest", + "bar": "bash:5", + "foo2": "bash:latest", + }, + Froms: []string{"bash:latest", "busybox:uclibc", "bash:5", "bash:latest", "scratch", "bash:latest", "bash:5", "bash:latest"}, + }, + }, + { + name: "RUN --mount", + dockerfile: ` + FROM scratch + RUN --mount=type=bind,from=busybox:uclibc,target=/tmp ["/tmp/bin/sh","-euxc","echo foo > /foo"] + `, + metadata: dockerfile.Metadata{ + StageFroms: []string{"scratch"}, + Froms: []string{"scratch"}, // TODO this should include "busybox:uclibc" + }, + }, + { + name: "RUN --mount=stage", + dockerfile: ` + FROM busybox:uclibc AS bb + RUN --network=none echo hi, a flag that is ignored + FROM scratch + RUN --mount=type=bind,from=bb,target=/tmp ["/tmp/bin/sh","-euxc","echo foo > /foo"] + `, + metadata: dockerfile.Metadata{ + StageFroms: []string{"busybox:uclibc", "scratch"}, + StageNames: []string{"bb"}, + StageNameFroms: map[string]string{"bb": "busybox:uclibc"}, + Froms: []string{"busybox:uclibc", "scratch"}, // TODO this should end with "busybox:uclibc" + }, + }, + } { + td := td + // some light normalization + if td.name == "" { + td.name = td.dockerfile + } + if len(td.metadata.Froms) > 0 && len(td.metadata.StageFroms) == 0 { + td.metadata.StageFroms = td.metadata.Froms + } + if td.metadata.StageNameFroms == nil { + td.metadata.StageNameFroms = map[string]string{} + } + t.Run(td.name, func(t *testing.T) { + parsed, err := dockerfile.Parse(td.dockerfile) + if err != nil { + t.Fatal(err) + } + + if parsed == nil { + t.Fatalf("expected:\n%#v\ngot:\n%#v", td.metadata, parsed) + } + + if !reflect.DeepEqual(*parsed, td.metadata) { + t.Fatalf("expected:\n%#v\ngot:\n%#v", td.metadata, *parsed) + } + }) + } +}