-
Notifications
You must be signed in to change notification settings - Fork 247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chunked: do not use a temporary file #2041
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ import ( | |
"github.com/containers/storage/pkg/chunked/toc" | ||
"github.com/containers/storage/pkg/fsverity" | ||
"github.com/containers/storage/pkg/idtools" | ||
"github.com/containers/storage/pkg/ioutils" | ||
"github.com/containers/storage/pkg/system" | ||
jsoniter "github.com/json-iterator/go" | ||
"github.com/klauspost/compress/zstd" | ||
|
@@ -107,7 +108,7 @@ type chunkedLayerData struct { | |
Format graphdriver.DifferOutputFormat `json:"format"` | ||
} | ||
|
||
func (c *chunkedDiffer) convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) { | ||
func (c *chunkedDiffer) convertTarToZstdChunked(destDirectory string, payload io.Reader) (int64, *seekableFile, digest.Digest, map[string]string, error) { | ||
diff, err := archive.DecompressStream(payload) | ||
if err != nil { | ||
return 0, nil, "", nil, err | ||
|
@@ -1076,7 +1077,7 @@ func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { | |
return new, nil | ||
} | ||
|
||
func (c *chunkedDiffer) copyAllBlobToFile(destination *os.File) (digest.Digest, error) { | ||
func (c *chunkedDiffer) requestWholeBlob() (io.ReadCloser, digest.Digester, error) { | ||
var payload io.ReadCloser | ||
var streams chan io.ReadCloser | ||
var errs chan error | ||
|
@@ -1091,26 +1092,27 @@ func (c *chunkedDiffer) copyAllBlobToFile(destination *os.File) (digest.Digest, | |
|
||
streams, errs, err = c.stream.GetBlobAt(chunksToRequest) | ||
if err != nil { | ||
return "", err | ||
return nil, nil, err | ||
} | ||
select { | ||
case p := <-streams: | ||
payload = p | ||
case err := <-errs: | ||
return "", err | ||
return nil, nil, err | ||
} | ||
if payload == nil { | ||
return "", errors.New("invalid stream returned") | ||
return nil, nil, errors.New("invalid stream returned") | ||
} | ||
|
||
originalRawDigester := digest.Canonical.Digester() | ||
digester := digest.Canonical.Digester() | ||
|
||
r := io.TeeReader(payload, originalRawDigester.Hash()) | ||
r := io.TeeReader(payload, digester.Hash()) | ||
|
||
// copy the entire tarball and compute its digest | ||
_, err = io.CopyBuffer(destination, r, c.copyBuffer) | ||
rc := ioutils.NewReadCloserWrapper(r, func() error { | ||
return payload.Close() | ||
}) | ||
|
||
return originalRawDigester.Digest(), err | ||
return rc, digester, nil | ||
} | ||
|
||
func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, differOpts *graphdriver.DifferOptions) (graphdriver.DriverWithDifferOutput, error) { | ||
|
@@ -1131,32 +1133,17 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff | |
var convertedBlobSize int64 | ||
|
||
if c.convertToZstdChunked { | ||
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600) | ||
payload, digester, err := c.requestWholeBlob() | ||
if err != nil { | ||
return graphdriver.DriverWithDifferOutput{}, &fs.PathError{Op: "open", Path: dest, Err: err} | ||
return graphdriver.DriverWithDifferOutput{}, err | ||
} | ||
blobFile := os.NewFile(uintptr(fd), "blob-file") | ||
defer func() { | ||
if blobFile != nil { | ||
blobFile.Close() | ||
if payload != nil { | ||
payload.Close() | ||
} | ||
}() | ||
|
||
// calculate the checksum before accessing the file. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Presumably we were doing this for a reason (security?) before...is it just that we think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm I don't remember the security implications, @mtrmac do you think this approach is fine or should I just close this PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On balance, I’d prefer this not to be merged as is. This exposes the conversion code and decompression to malicious input in more situations. In principle, the conversion code and decompression must be able to handle malicious input anyway, because an attacker could, in many cases, refer to the malicious input in a valid manifest, without triggering this check. But some users might have a policy which only accepts signed images, i.e. the malicious input would only be digested, not processed otherwise. I’m really more worried about the decompression than the chunked conversion. That’s a lot of bit manipulation, potentially in an unsafe language “for performance”, with a fair history of vulnerabilities. … and for ordinary image pulls, we currently also stream the input through decompression, only validating the digest concurrently; we don’t validate the digest before starting to decompress. So, in that sense, we are already accepting a larger part of the risk. So I don’t feel that strongly about it here. More importantly, if this is aiming at the c/storage So, if the goal is code structure, not performance or If you don’t actually care about the performance at this point, we get the increased risk but not any benefit we value — so I’d prefer to close the PR and not merge this; we can always resurrect it later. Except for the one line that allows |
||
compressedDigest, err = c.copyAllBlobToFile(blobFile) | ||
if err != nil { | ||
return graphdriver.DriverWithDifferOutput{}, err | ||
} | ||
|
||
if compressedDigest != c.blobDigest { | ||
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("invalid digest to convert: expected %q, got %q", c.blobDigest, compressedDigest) | ||
} | ||
|
||
if _, err := blobFile.Seek(0, io.SeekStart); err != nil { | ||
return graphdriver.DriverWithDifferOutput{}, err | ||
} | ||
|
||
tarSize, fileSource, diffID, annotations, err := c.convertTarToZstdChunked(dest, blobFile) | ||
tarSize, fileSource, diffID, annotations, err := c.convertTarToZstdChunked(dest, payload) | ||
if err != nil { | ||
return graphdriver.DriverWithDifferOutput{}, err | ||
} | ||
|
@@ -1165,9 +1152,15 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff | |
// need to keep it open until the entire file is processed. | ||
defer fileSource.Close() | ||
|
||
// Close the file so that the file descriptor is released and the file is deleted. | ||
blobFile.Close() | ||
blobFile = nil | ||
// Make sure the entire payload is consumed. | ||
_, _ = io.Copy(io.Discard, payload) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we still check for errors here? Maybe it doesn't matter because if e.g. we get a short read or something we'll presumably fail checksum validation anyways. If that's the idea then I think it at least deserves a comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've not measured performance, this is just a preparatory work for extending I've not yet found a nice way to extend the API though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes (if this goes in) I think it would be better to report a read error (the cause) instead of a digest mismatch that is hard to diagnose. |
||
payload.Close() | ||
payload = nil | ||
|
||
compressedDigest = digester.Digest() | ||
if compressedDigest != c.blobDigest { | ||
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("invalid digest to convert: expected %q, got %q", c.blobDigest, compressedDigest) | ||
} | ||
|
||
tocDigest, err := toc.GetTOCDigest(annotations) | ||
if err != nil { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We were not closing
payload
before? That fix should not be forgotten.