diff --git a/internal/verifier/attestation.go b/internal/verifier/attestation.go index 7f880fae..31ce2b33 100644 --- a/internal/verifier/attestation.go +++ b/internal/verifier/attestation.go @@ -68,6 +68,7 @@ func CreateAttestations(ctx context.Context, input rebuild.Input, finalStrategy // TODO: Include build repository associated with this builder. } publicRebuildURI := path.Join("rebuild", buildInfo.Target.Artifact) + // TODO: Change from "normalized" to "stabilized". publicNormalizedURI := path.Join("normalized", buildInfo.Target.Artifact) // Create comparison attestation. eqStmt := &in_toto.ProvenanceStatementSLSA1{ diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index b9239a6d..81c9a502 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -23,8 +23,11 @@ import ( "github.com/pkg/errors" ) +var AllStabilizers = append(AllZipStabilizers, AllTarStabilizers...) + // Stabilize selects and applies the stabilization routine for the given archive format. func Stabilize(dst io.Writer, src io.Reader, f Format) error { + opts := StabilizeOpts{Stabilizers: AllStabilizers} switch f { case ZipFormat: srcReader, size, err := toZipCompatibleReader(src) @@ -37,7 +40,7 @@ func Stabilize(dst io.Writer, src io.Reader, f Format) error { } zw := zip.NewWriter(dst) defer zw.Close() - err = StabilizeZip(zr, zw) + err = StabilizeZip(zr, zw, opts) if err != nil { return errors.Wrap(err, "stabilizing zip") } @@ -49,7 +52,7 @@ func Stabilize(dst io.Writer, src io.Reader, f Format) error { defer gzr.Close() gzw := gzip.NewWriter(dst) defer gzw.Close() - err = StabilizeTar(tar.NewReader(gzr), tar.NewWriter(gzw)) + err = StabilizeTar(tar.NewReader(gzr), tar.NewWriter(gzw), opts) if err != nil { return errors.Wrap(err, "stabilizing tar") } diff --git a/pkg/archive/common.go b/pkg/archive/common.go index e4893db2..1776241f 100644 --- a/pkg/archive/common.go +++ b/pkg/archive/common.go @@ -27,6 +27,11 @@ const ( RawFormat ) +// StabilizeOpts aggregates sanitizers to be used in stabilization. +type StabilizeOpts struct { + Stabilizers []any +} + // ContentSummary is a summary of rebuild-relevant features of an archive. type ContentSummary struct { Files []string diff --git a/pkg/archive/tar.go b/pkg/archive/tar.go index d0b76bc0..fdb6a334 100644 --- a/pkg/archive/tar.go +++ b/pkg/archive/tar.go @@ -20,10 +20,10 @@ import ( "crypto/sha256" "encoding/hex" "io" + "io/fs" "os" "path/filepath" "slices" - "sort" "strings" "time" @@ -35,30 +35,6 @@ import ( // Source: https://github.com/npm/pacote/blob/main/lib/util/tar-create-options.js#L28 var arbitraryTime = time.Date(1985, time.October, 26, 8, 15, 0, 0, time.UTC) -func stabilizeTarHeader(h *tar.Header) (*tar.Header, error) { - switch h.Typeflag { - case tar.TypeGNUSparse, tar.TypeGNULongName, tar.TypeGNULongLink: - // NOTE: Non-PAX header type support can be added, if necessary. - return nil, errors.Errorf("Unsupported header type: %v", h.Typeflag) - default: - return &tar.Header{ - Typeflag: h.Typeflag, - Name: h.Name, - ModTime: arbitraryTime, - AccessTime: arbitraryTime, - // TODO: Surface presence/absence of execute bit as a comparison config. - Mode: 0777, - Uid: 0, - Gid: 0, - Uname: "", - Gname: "", - Size: h.Size, - // TODO: Surface comparison config for TAR metadata (PAXRecords, Xattrs). - Format: tar.FormatPAX, - }, nil - } -} - // TarEntry represents an entry in a tar archive. type TarEntry struct { *tar.Header @@ -76,10 +52,90 @@ func (e TarEntry) WriteTo(tw *tar.Writer) error { return nil } +type TarArchive struct { + Files []*TarEntry +} + +type TarArchiveStabilizer struct { + Name string + Func func(*TarArchive) +} + +type TarEntryStabilizer struct { + Name string + Func func(*TarEntry) +} + +var AllTarStabilizers []any = []any{ + StableTarFileOrder, + StableTarTime, + StableTarFileMode, + StableTarOwners, + StableTarXattrs, + StableTarDeviceNumber, +} + +var StableTarFileOrder = TarArchiveStabilizer{ + Name: "tar-file-order", + Func: func(f *TarArchive) { + slices.SortFunc(f.Files, func(a, b *TarEntry) int { + return strings.Compare(a.Name, b.Name) + }) + }, +} + +var StableTarTime = TarEntryStabilizer{ + Name: "tar-time", + Func: func(e *TarEntry) { + e.ModTime = arbitraryTime + e.AccessTime = arbitraryTime + e.ChangeTime = time.Time{} + // NOTE: Without a PAX record, the tar library will disregard this value + // and write the format as USTAR. Setting 'atime' ensures at least one + // PAX record exists which will cause tar to be always be considered a PAX. + e.Format = tar.FormatPAX + }, +} + +var StableTarFileMode = TarEntryStabilizer{ + Name: "tar-file-mode", + Func: func(e *TarEntry) { + e.Mode = int64(fs.ModePerm) + }, +} + +var StableTarOwners = TarEntryStabilizer{ + Name: "tar-owners", + Func: func(e *TarEntry) { + e.Uid = 0 + e.Gid = 0 + e.Uname = "" + e.Gname = "" + }, +} + +var StableTarXattrs = TarEntryStabilizer{ + Name: "tar-xattrs", + Func: func(e *TarEntry) { + clear(e.Xattrs) + clear(e.PAXRecords) + }, +} + +var StableTarDeviceNumber = TarEntryStabilizer{ + Name: "tar-device-number", + Func: func(e *TarEntry) { + // NOTE: 0 is currently reserved on Linux and will dynamically allocate a + // device number when passed to the kernel. + e.Devmajor = 0 + e.Devminor = 0 + }, +} + // StabilizeTar strips volatile metadata and re-writes the provided archive in a standard form. -func StabilizeTar(tr *tar.Reader, tw *tar.Writer) error { +func StabilizeTar(tr *tar.Reader, tw *tar.Writer, opts StabilizeOpts) error { defer tw.Close() - var ents []TarEntry + var ents []*TarEntry for { header, err := tr.Next() if err != nil { @@ -88,22 +144,31 @@ func StabilizeTar(tr *tar.Reader, tw *tar.Writer) error { } return err } - stabilized, err := stabilizeTarHeader(header) - if err != nil { - return err + // NOTE: Non-PAX header type support can be added, if necessary. + switch header.Typeflag { + case tar.TypeGNUSparse, tar.TypeGNULongName, tar.TypeGNULongLink: + return errors.New("Unsupported file type") } buf, err := io.ReadAll(tr) if err != nil { return err } - // TODO: Memory-intensive. We're buffering the full file in memory (again). - // One option would be to do two passes and only buffer what's necessary. - ents = append(ents, TarEntry{stabilized, buf[:]}) + // NOTE: Memory-intensive. We're buffering the full file in memory as + // tar.Reader is single-pass and we need to support sorting entries. + ents = append(ents, &TarEntry{header, buf[:]}) + } + f := TarArchive{Files: ents} + for _, s := range opts.Stabilizers { + switch s.(type) { + case TarArchiveStabilizer: + s.(TarArchiveStabilizer).Func(&f) + case TarEntryStabilizer: + for _, ent := range f.Files { + s.(TarEntryStabilizer).Func(ent) + } + } } - sort.Slice(ents, func(i, j int) bool { - return ents[i].Header.Name < ents[j].Header.Name - }) - for _, ent := range ents { + for _, ent := range f.Files { if err := ent.WriteTo(tw); err != nil { return err } diff --git a/pkg/archive/tar_test.go b/pkg/archive/tar_test.go index e4610b8f..1095af29 100644 --- a/pkg/archive/tar_test.go +++ b/pkg/archive/tar_test.go @@ -77,7 +77,7 @@ func TestStabilizeTar(t *testing.T) { } var output bytes.Buffer zr := tar.NewReader(bytes.NewReader(input.Bytes())) - err := StabilizeTar(zr, tar.NewWriter(&output)) + err := StabilizeTar(zr, tar.NewWriter(&output), StabilizeOpts{Stabilizers: AllTarStabilizers}) if err != nil { t.Fatalf("StabilizeTar(%v) = %v, want nil", tc.test, err) } diff --git a/pkg/archive/zip.go b/pkg/archive/zip.go index 7b9c5cf9..aa99fd50 100644 --- a/pkg/archive/zip.go +++ b/pkg/archive/zip.go @@ -20,7 +20,8 @@ import ( "crypto/sha256" "encoding/hex" "io" - "sort" + "slices" + "strings" "time" "github.com/pkg/errors" @@ -51,6 +52,7 @@ func NewContentSummaryFromZip(zr *zip.Reader) (*ContentSummary, error) { } // ZipEntry represents an entry in a zip archive. +// TODO: Move to archivetest. type ZipEntry struct { *zip.FileHeader Body []byte @@ -68,40 +70,164 @@ func (e ZipEntry) WriteTo(zw *zip.Writer) error { return nil } -// StabilizeZip strips volatile metadata and rewrites the provided archive in a standard form. -func StabilizeZip(zr *zip.Reader, zw *zip.Writer) error { - defer zw.Close() - var ents []ZipEntry - for _, f := range zr.File { - r, err := f.Open() +// MutableZipFile wraps zip.File to allow in-place modification of the original. +type MutableZipFile struct { + zip.FileHeader + File *zip.File + mutContent []byte +} + +func (mf *MutableZipFile) Open() (io.Reader, error) { + if mf.mutContent != nil { + return bytes.NewReader(mf.mutContent), nil + } + return mf.File.Open() +} + +func (mf *MutableZipFile) SetContent(content []byte) { + mf.mutContent = content +} + +// MutableZipReader wraps zip.Reader to allow in-place modification of the original. +type MutableZipReader struct { + *zip.Reader + File []*MutableZipFile + Comment string +} + +func NewMutableReader(zr *zip.Reader) MutableZipReader { + mr := MutableZipReader{Reader: zr} + mr.Comment = mr.Reader.Comment + for _, zf := range zr.File { + mr.File = append(mr.File, &MutableZipFile{File: zf, FileHeader: zf.FileHeader}) + } + return mr +} + +func (mr MutableZipReader) WriteTo(zw *zip.Writer) error { + if err := zw.SetComment(mr.Comment); err != nil { + return err + } + for _, mf := range mr.File { + r, err := mf.Open() if err != nil { return err } - b, err := io.ReadAll(r) + w, err := zw.CreateHeader(&mf.FileHeader) if err != nil { - r.Close() return err } - if err := r.Close(); err != nil { + if _, err := io.Copy(w, r); err != nil { return err } - // TODO: Memory-intensive. We're buffering the full file in memory (again). - // One option would be to do two passes and only buffer what's necessary. - ents = append(ents, ZipEntry{&zip.FileHeader{Name: f.Name, Modified: time.UnixMilli(0)}, b}) } - sort.Slice(ents, func(i, j int) bool { - return ents[i].FileHeader.Name < ents[j].FileHeader.Name - }) - for _, ent := range ents { - w, err := zw.CreateHeader(ent.FileHeader) - if err != nil { - return err - } - if _, err := io.Copy(w, bytes.NewReader(ent.Body)); err != nil { - return err + return nil +} + +type ZipArchiveStabilizer struct { + Name string + Func func(*MutableZipReader) +} + +type ZipEntryStabilizer struct { + Name string + Func func(*MutableZipFile) +} + +var AllZipStabilizers []any = []any{ + StableZipFileOrder, + StableZipModifiedTime, + StableZipCompression, + StableZipDataDescriptor, + StableZipFileEncoding, + StableZipFileMode, + StableZipMisc, +} + +var StableZipFileOrder = ZipArchiveStabilizer{ + Name: "zip-file-order", + Func: func(zr *MutableZipReader) { + slices.SortFunc(zr.File, func(i, j *MutableZipFile) int { + return strings.Compare(i.Name, j.Name) + }) + }, +} + +var StableZipModifiedTime = ZipEntryStabilizer{ + Name: "zip-modified-time", + Func: func(zf *MutableZipFile) { + zf.Modified = time.UnixMilli(0) + zf.ModifiedDate = 0 + zf.ModifiedTime = 0 + }, +} + +var StableZipCompression = ZipEntryStabilizer{ + Name: "zip-compression", + Func: func(zf *MutableZipFile) { + zf.Method = zip.Store + }, +} + +var dataDescriptorFlag = uint16(0x8) + +var StableZipDataDescriptor = ZipEntryStabilizer{ + Name: "zip-data-descriptor", + Func: func(zf *MutableZipFile) { + zf.Flags = zf.Flags & ^dataDescriptorFlag + zf.CRC32 = 0 + zf.CompressedSize = 0 + zf.CompressedSize64 = 0 + zf.UncompressedSize = 0 + zf.UncompressedSize64 = 0 + }, +} + +var StableZipFileEncoding = ZipEntryStabilizer{ + Name: "zip-file-encoding", + Func: func(zf *MutableZipFile) { + zf.NonUTF8 = false + }, +} + +var StableZipFileMode = ZipEntryStabilizer{ + Name: "zip-file-mode", + Func: func(zf *MutableZipFile) { + zf.CreatorVersion = 0 + zf.ExternalAttrs = 0 + }, +} + +var StableZipMisc = ZipEntryStabilizer{ + Name: "zip-misc", + Func: func(zf *MutableZipFile) { + zf.Comment = "" + zf.ReaderVersion = 0 + zf.Extra = []byte{} + // NOTE: Zero all flags except the data descriptor one handled above. + zf.Flags = zf.Flags & dataDescriptorFlag + }, +} + +// StabilizeZip strips volatile metadata and rewrites the provided archive in a standard form. +func StabilizeZip(zr *zip.Reader, zw *zip.Writer, opts StabilizeOpts) error { + defer zw.Close() + var headers []zip.FileHeader + for _, zf := range zr.File { + headers = append(headers, zf.FileHeader) + } + mr := NewMutableReader(zr) + for _, s := range opts.Stabilizers { + switch s.(type) { + case ZipArchiveStabilizer: + s.(ZipArchiveStabilizer).Func(&mr) + case ZipEntryStabilizer: + for _, mf := range mr.File { + s.(ZipEntryStabilizer).Func(mf) + } } } - return nil + return mr.WriteTo(zw) } // toZipCompatibleReader coerces an io.Reader into an io.ReaderAt required to construct a zip.Reader. diff --git a/pkg/archive/zip_test.go b/pkg/archive/zip_test.go index f24c8c2e..cdaee9cd 100644 --- a/pkg/archive/zip_test.go +++ b/pkg/archive/zip_test.go @@ -83,7 +83,7 @@ func TestStabilizeZip(t *testing.T) { } var output bytes.Buffer zr := must(zip.NewReader(bytes.NewReader(input.Bytes()), int64(input.Len()))) - err := StabilizeZip(zr, zip.NewWriter(&output)) + err := StabilizeZip(zr, zip.NewWriter(&output), StabilizeOpts{Stabilizers: AllZipStabilizers}) if err != nil { t.Fatalf("StabilizeZip(%v) = %v, want nil", tc.test, err) }