Skip to content

Commit

Permalink
Separate out stabilization into configurable passes (#132)
Browse files Browse the repository at this point in the history
  • Loading branch information
msuozzo authored Oct 29, 2024
1 parent be0f21d commit 15e6dea
Show file tree
Hide file tree
Showing 7 changed files with 265 additions and 65 deletions.
1 change: 1 addition & 0 deletions internal/verifier/attestation.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ func CreateAttestations(ctx context.Context, input rebuild.Input, finalStrategy
// TODO: Include build repository associated with this builder.
}
publicRebuildURI := path.Join("rebuild", buildInfo.Target.Artifact)
// TODO: Change from "normalized" to "stabilized".
publicNormalizedURI := path.Join("normalized", buildInfo.Target.Artifact)
// Create comparison attestation.
eqStmt := &in_toto.ProvenanceStatementSLSA1{
Expand Down
7 changes: 5 additions & 2 deletions pkg/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ import (
"github.com/pkg/errors"
)

var AllStabilizers = append(AllZipStabilizers, AllTarStabilizers...)

// Stabilize selects and applies the stabilization routine for the given archive format.
func Stabilize(dst io.Writer, src io.Reader, f Format) error {
opts := StabilizeOpts{Stabilizers: AllStabilizers}
switch f {
case ZipFormat:
srcReader, size, err := toZipCompatibleReader(src)
Expand All @@ -37,7 +40,7 @@ func Stabilize(dst io.Writer, src io.Reader, f Format) error {
}
zw := zip.NewWriter(dst)
defer zw.Close()
err = StabilizeZip(zr, zw)
err = StabilizeZip(zr, zw, opts)
if err != nil {
return errors.Wrap(err, "stabilizing zip")
}
Expand All @@ -49,7 +52,7 @@ func Stabilize(dst io.Writer, src io.Reader, f Format) error {
defer gzr.Close()
gzw := gzip.NewWriter(dst)
defer gzw.Close()
err = StabilizeTar(tar.NewReader(gzr), tar.NewWriter(gzw))
err = StabilizeTar(tar.NewReader(gzr), tar.NewWriter(gzw), opts)
if err != nil {
return errors.Wrap(err, "stabilizing tar")
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/archive/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ const (
RawFormat
)

// StabilizeOpts aggregates sanitizers to be used in stabilization.
type StabilizeOpts struct {
Stabilizers []any
}

// ContentSummary is a summary of rebuild-relevant features of an archive.
type ContentSummary struct {
Files []string
Expand Down
139 changes: 102 additions & 37 deletions pkg/archive/tar.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ import (
"crypto/sha256"
"encoding/hex"
"io"
"io/fs"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"time"

Expand All @@ -35,30 +35,6 @@ import (
// Source: https://github.com/npm/pacote/blob/main/lib/util/tar-create-options.js#L28
var arbitraryTime = time.Date(1985, time.October, 26, 8, 15, 0, 0, time.UTC)

func stabilizeTarHeader(h *tar.Header) (*tar.Header, error) {
switch h.Typeflag {
case tar.TypeGNUSparse, tar.TypeGNULongName, tar.TypeGNULongLink:
// NOTE: Non-PAX header type support can be added, if necessary.
return nil, errors.Errorf("Unsupported header type: %v", h.Typeflag)
default:
return &tar.Header{
Typeflag: h.Typeflag,
Name: h.Name,
ModTime: arbitraryTime,
AccessTime: arbitraryTime,
// TODO: Surface presence/absence of execute bit as a comparison config.
Mode: 0777,
Uid: 0,
Gid: 0,
Uname: "",
Gname: "",
Size: h.Size,
// TODO: Surface comparison config for TAR metadata (PAXRecords, Xattrs).
Format: tar.FormatPAX,
}, nil
}
}

// TarEntry represents an entry in a tar archive.
type TarEntry struct {
*tar.Header
Expand All @@ -76,10 +52,90 @@ func (e TarEntry) WriteTo(tw *tar.Writer) error {
return nil
}

type TarArchive struct {
Files []*TarEntry
}

type TarArchiveStabilizer struct {
Name string
Func func(*TarArchive)
}

type TarEntryStabilizer struct {
Name string
Func func(*TarEntry)
}

var AllTarStabilizers []any = []any{
StableTarFileOrder,
StableTarTime,
StableTarFileMode,
StableTarOwners,
StableTarXattrs,
StableTarDeviceNumber,
}

var StableTarFileOrder = TarArchiveStabilizer{
Name: "tar-file-order",
Func: func(f *TarArchive) {
slices.SortFunc(f.Files, func(a, b *TarEntry) int {
return strings.Compare(a.Name, b.Name)
})
},
}

var StableTarTime = TarEntryStabilizer{
Name: "tar-time",
Func: func(e *TarEntry) {
e.ModTime = arbitraryTime
e.AccessTime = arbitraryTime
e.ChangeTime = time.Time{}
// NOTE: Without a PAX record, the tar library will disregard this value
// and write the format as USTAR. Setting 'atime' ensures at least one
// PAX record exists which will cause tar to be always be considered a PAX.
e.Format = tar.FormatPAX
},
}

var StableTarFileMode = TarEntryStabilizer{
Name: "tar-file-mode",
Func: func(e *TarEntry) {
e.Mode = int64(fs.ModePerm)
},
}

var StableTarOwners = TarEntryStabilizer{
Name: "tar-owners",
Func: func(e *TarEntry) {
e.Uid = 0
e.Gid = 0
e.Uname = ""
e.Gname = ""
},
}

var StableTarXattrs = TarEntryStabilizer{
Name: "tar-xattrs",
Func: func(e *TarEntry) {
clear(e.Xattrs)
clear(e.PAXRecords)
},
}

var StableTarDeviceNumber = TarEntryStabilizer{
Name: "tar-device-number",
Func: func(e *TarEntry) {
// NOTE: 0 is currently reserved on Linux and will dynamically allocate a
// device number when passed to the kernel.
e.Devmajor = 0
e.Devminor = 0
},
}

// StabilizeTar strips volatile metadata and re-writes the provided archive in a standard form.
func StabilizeTar(tr *tar.Reader, tw *tar.Writer) error {
func StabilizeTar(tr *tar.Reader, tw *tar.Writer, opts StabilizeOpts) error {
defer tw.Close()
var ents []TarEntry
var ents []*TarEntry
for {
header, err := tr.Next()
if err != nil {
Expand All @@ -88,22 +144,31 @@ func StabilizeTar(tr *tar.Reader, tw *tar.Writer) error {
}
return err
}
stabilized, err := stabilizeTarHeader(header)
if err != nil {
return err
// NOTE: Non-PAX header type support can be added, if necessary.
switch header.Typeflag {
case tar.TypeGNUSparse, tar.TypeGNULongName, tar.TypeGNULongLink:
return errors.New("Unsupported file type")
}
buf, err := io.ReadAll(tr)
if err != nil {
return err
}
// TODO: Memory-intensive. We're buffering the full file in memory (again).
// One option would be to do two passes and only buffer what's necessary.
ents = append(ents, TarEntry{stabilized, buf[:]})
// NOTE: Memory-intensive. We're buffering the full file in memory as
// tar.Reader is single-pass and we need to support sorting entries.
ents = append(ents, &TarEntry{header, buf[:]})
}
f := TarArchive{Files: ents}
for _, s := range opts.Stabilizers {
switch s.(type) {
case TarArchiveStabilizer:
s.(TarArchiveStabilizer).Func(&f)
case TarEntryStabilizer:
for _, ent := range f.Files {
s.(TarEntryStabilizer).Func(ent)
}
}
}
sort.Slice(ents, func(i, j int) bool {
return ents[i].Header.Name < ents[j].Header.Name
})
for _, ent := range ents {
for _, ent := range f.Files {
if err := ent.WriteTo(tw); err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/archive/tar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TestStabilizeTar(t *testing.T) {
}
var output bytes.Buffer
zr := tar.NewReader(bytes.NewReader(input.Bytes()))
err := StabilizeTar(zr, tar.NewWriter(&output))
err := StabilizeTar(zr, tar.NewWriter(&output), StabilizeOpts{Stabilizers: AllTarStabilizers})
if err != nil {
t.Fatalf("StabilizeTar(%v) = %v, want nil", tc.test, err)
}
Expand Down
Loading

0 comments on commit 15e6dea

Please sign in to comment.