Skip to content

Commit

Permalink
Support Extended Digest Algorithms (#103)
Browse files Browse the repository at this point in the history
  • Loading branch information
srerickson authored Nov 1, 2024
1 parent 38e5f0e commit c3be774
Show file tree
Hide file tree
Showing 43 changed files with 1,010 additions and 690 deletions.
3 changes: 3 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM mcr.microsoft.com/devcontainers/go:1.3.0-1.23-bookworm
# RUN echo 'deb [trusted=yes] https://repo.goreleaser.com/apt/ /' | sudo tee /etc/apt/sources.list.d/goreleaser.list
# RUN apt-get update && apt-get upgrade -y && apt-get install -y goreleaser
10 changes: 10 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/go
{
"name": "Go",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"dockerComposeFile": "docker-compose.yml",
"service": "devcontainer",
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}"
}

28 changes: 28 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
version: '3.8'
services:
devcontainer:
build:
context: .
dockerfile: Dockerfile
volumes:
- ../..:/workspaces:cached
network_mode: service:minio
command: sleep infinity
environment:
AWS_ACCESS_KEY_ID: ocfltest
AWS_SECRET_ACCESS_KEY: ocfltest
AWS_REGION: us-east-1
OCFL_TEST_S3: "http://minio:9000"

minio:
image: quay.io/minio/minio:RELEASE.2024-09-22T00-33-43Z
restart: unless-stopped
volumes:
- minio-data:/data
command: server /data
environment:
MINIO_ROOT_USER: ocfltest
MINIO_ROOT_PASSWORD: ocfltest

volumes:
minio-data:
250 changes: 8 additions & 242 deletions digest.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,244 +2,38 @@ package ocfl

import (
"context"
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"encoding/hex"
"errors"
"fmt"
"hash"
"io"
"iter"
"path"
"runtime"
"strings"
"sync"

"github.com/srerickson/ocfl-go/digest"
"github.com/srerickson/ocfl-go/internal/pipeline"
"golang.org/x/crypto/blake2b"
)

var ErrUnknownAlg = errors.New("unknown digest algorithm")

const (
SHA512 = `sha512`
SHA256 = `sha256`
SHA1 = `sha1`
MD5 = `md5`
BLAKE2B = `blake2b-512`
)

var (
// built-in digest algorithm definitions
builtin = map[string]func() Digester{
SHA512: func() Digester { return newHashDigester(sha512.New()) },
SHA256: func() Digester { return newHashDigester(sha256.New()) },
SHA1: func() Digester { return newHashDigester(sha1.New()) },
MD5: func() Digester { return newHashDigester(md5.New()) },
BLAKE2B: func() Digester { return newHashDigester(mustBlake2bNew512()) },
}

// register includes digest algorithms registered with RegisterAlg
register = map[string]func() Digester{}
registerMx = sync.RWMutex{}
)

// RegisteredAlgs returns a slice of all available digest algorithms
func RegisteredAlgs() []string {
algs := make([]string, 0, len(builtin)+len(register))
for k := range builtin {
algs = append(algs, k)
}
for k := range register {
algs = append(algs, k)
}
return algs
}

// RegisterAlg registers the Digester constructor for alg, so that alg.New() can
// be used.
func RegisterAlg(alg string, newDigester func() Digester) {
// check built-in
if builtin[alg] != nil {
return
}
// check register
registerMx.Lock()
defer registerMx.Unlock()
if register[alg] != nil {
return
}
register[alg] = newDigester
}

// New returns a new Digester for generated digest values. If a Digester
// constructor was not registered for a, nil is returne.
func NewDigester(alg string) Digester {
// check built-in
if newDigester := builtin[alg]; newDigester != nil {
return newDigester()
}
// check register
registerMx.RLock()
defer registerMx.RUnlock()
if newDigester := register[alg]; newDigester != nil {
return newDigester()
}
return nil
}

// Digester is an interface used for generating digest values.
type Digester interface {
io.Writer
// String() returns the digest value for the bytes written to the digester.
String() string
}

type hashDigester struct {
hash.Hash
}

func newHashDigester(h hash.Hash) hashDigester {
return hashDigester{Hash: h}
}

func (h hashDigester) String() string { return hex.EncodeToString(h.Sum(nil)) }

// MultiDigester is used to generate digests for multiple digest algorithms at
// the same time.
type MultiDigester struct {
io.Writer
digesters map[string]Digester
}

func NewMultiDigester(algs ...string) *MultiDigester {
writers := make([]io.Writer, 0, len(algs))
digesters := make(map[string]Digester, len(algs))
for _, alg := range algs {
if digester := NewDigester(alg); digester != nil {
digesters[alg] = digester
writers = append(writers, digester)
}
}
if len(writers) == 0 {
return &MultiDigester{Writer: io.Discard}
}
return &MultiDigester{
Writer: io.MultiWriter(writers...),
digesters: digesters,
}
}

func (md MultiDigester) Sum(alg string) string {
if dig := md.digesters[alg]; dig != nil {
return dig.String()
}
return ""
}

// Sums returns a DigestSet with all digest values
// for the MultiDigester
func (md MultiDigester) Sums() DigestSet {
set := make(DigestSet, len(md.digesters))
for alg, digester := range md.digesters {
set[alg] = digester.String()
}
return set
}

// Set is a set of digest results
type DigestSet map[string]string

func (s DigestSet) Add(s2 DigestSet) error {
for alg, newDigest := range s2 {
currDigest := s[alg]
if currDigest == "" {
s[alg] = newDigest
continue
}
if strings.EqualFold(currDigest, newDigest) {
continue
}
// digest conflict
return &DigestError{
Alg: alg,
Got: newDigest,
Expected: currDigest,
}
}
return nil
}

// ConflictWith returns keys in s with values that do not match the corresponding
// key in other.
func (s DigestSet) ConflictWith(other DigestSet) []string {
var keys []string
for alg, sv := range s {
if ov, ok := other[alg]; ok && !strings.EqualFold(sv, ov) {
keys = append(keys, alg)
}
}
return keys
}

// Validate digests reader and return an error if the resulting digest for any
// algorithm in s doesn't match the value in s.
func (s DigestSet) Validate(reader io.Reader) error {
algs := make([]string, 0, len(s))
for alg := range s {
algs = append(algs, alg)
}
digester := NewMultiDigester(algs...)
if _, err := io.Copy(digester, reader); err != nil {
return err
}
result := digester.Sums()
for _, alg := range result.ConflictWith(s) {
return &DigestError{Alg: alg, Expected: s[alg], Got: result[alg]}
}
return nil
}

// DigestError is returned when content's digest conflicts with an expected value
type DigestError struct {
Path string // Content path
Alg string // Digest algorithm
Got string // Calculated digest
Expected string // Expected digest
}

func (e DigestError) Error() string {
if e.Path == "" {
return fmt.Sprintf("unexpected %s value: %q, expected=%q", e.Alg, e.Got, e.Expected)
}
return fmt.Sprintf("unexpected %s for %q: %q, expected=%q", e.Alg, e.Path, e.Got, e.Expected)
}

// Digest is equivalent to ConcurrentDigest with the number of digest workers
// set to runtime.NumCPU(). The pathAlgs argument is an iterator that yields
// file paths and a slice of digest algorithms. It returns an iteratator the
// yields PathDigest or an error.
func Digest(ctx context.Context, fsys FS, pathAlgs iter.Seq2[string, []string]) iter.Seq2[PathDigests, error] {
func Digest(ctx context.Context, fsys FS, pathAlgs iter.Seq2[string, []digest.Algorithm]) iter.Seq2[PathDigests, error] {
return ConcurrentDigest(ctx, fsys, pathAlgs, runtime.NumCPU())
}

// ConcurrentDigest concurrently digests files in an FS. The pathAlgs argument
// is an iterator that yields file paths and a slice of digest algorithms. It
// returns an iteratator the yields PathDigest or an error.
func ConcurrentDigest(ctx context.Context, fsys FS, pathAlgs iter.Seq2[string, []string], numWorkers int) iter.Seq2[PathDigests, error] {
func ConcurrentDigest(ctx context.Context, fsys FS, pathAlgs iter.Seq2[string, []digest.Algorithm], numWorkers int) iter.Seq2[PathDigests, error] {
// checksum digestJob
type digestJob struct {
path string
algs []string
algs []digest.Algorithm
}
jobsIter := func(yield func(digestJob) bool) {
pathAlgs(func(name string, algs []string) bool {
pathAlgs(func(name string, algs []digest.Algorithm) bool {
return yield(digestJob{path: name, algs: algs})
})
}
runJobs := func(j digestJob) (digests DigestSet, err error) {
runJobs := func(j digestJob) (digests digest.Set, err error) {
f, err := fsys.OpenFile(ctx, j.path)
if err != nil {
return
Expand All @@ -249,7 +43,7 @@ func ConcurrentDigest(ctx context.Context, fsys FS, pathAlgs iter.Seq2[string, [
err = errors.Join(err, closeErr)
}
}()
digester := NewMultiDigester(j.algs...)
digester := digest.NewMultiDigester(j.algs...)
if _, err = io.Copy(digester, f); err != nil {
return
}
Expand All @@ -273,33 +67,5 @@ func ConcurrentDigest(ctx context.Context, fsys FS, pathAlgs iter.Seq2[string, [
// digests for a file in an FS.
type PathDigests struct {
Path string
Digests DigestSet
}

// Validate validates pd's DigestSet by reading the file at pd.Path, relative to
// the directory parent in fsys. The returned bool is true if the file was read
// and all digests validated; in this case, the returned error may be non-nil if
// error occured closing the file.
func (pd PathDigests) Validate(ctx context.Context, fsys FS, parent string) (bool, error) {
f, err := fsys.OpenFile(ctx, path.Join(parent, pd.Path))
if err != nil {
return false, err
}
if err := pd.Digests.Validate(f); err != nil {
f.Close()
var digestErr *DigestError
if errors.As(err, &digestErr) {
digestErr.Path = pd.Path
}
return false, err
}
return true, f.Close()
}

func mustBlake2bNew512() hash.Hash {
h, err := blake2b.New512(nil)
if err != nil {
panic("creating new blake2b hash")
}
return h
Digests digest.Set
}
Loading

0 comments on commit c3be774

Please sign in to comment.