Skip to content

Commit

Permalink
Don't unnecessarily compute the blob digest in PutBlob
Browse files Browse the repository at this point in the history
Introduce internal/putblobdigest.Digester to encapsulate
the two alternatives, so that the PutBlob implementations only
need to plug it in.

Then use it throughout: let PutBlob use caller-provided digest
values, if any (and they use the right algorithm).

Signed-off-by: Miloslav Trmač <mitr@redhat.com>
  • Loading branch information
mtrmac committed Aug 23, 2021
1 parent a2c13e9 commit bc2f150
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 35 deletions.
4 changes: 2 additions & 2 deletions directory/directory_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"path/filepath"
"runtime"

"github.com/containers/image/v5/internal/putblobdigest"
"github.com/containers/image/v5/types"
"github.com/opencontainers/go-digest"
"github.com/pkg/errors"
Expand Down Expand Up @@ -163,8 +164,7 @@ func (d *dirImageDestination) PutBlob(ctx context.Context, stream io.Reader, inp
}
}()

digester := digest.Canonical.Digester()
stream = io.TeeReader(stream, digester.Hash())
digester, stream := putblobdigest.DigestIfCanonicalUnknown(stream, inputInfo)
// TODO: This can take quite some time, and should ideally be cancellable using ctx.Done().
size, err := io.Copy(blobFile, stream)
if err != nil {
Expand Down
36 changes: 25 additions & 11 deletions directory/directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,32 +60,46 @@ func TestGetPutManifest(t *testing.T) {
}

func TestGetPutBlob(t *testing.T) {
computedBlob := []byte("test-blob")
providedBlob := []byte("provided-blob")
providedDigest := digest.Digest("sha256:provided-test-digest")

ref, tmpDir := refToTempDir(t)
defer os.RemoveAll(tmpDir)
cache := memory.New()

blob := []byte("test-blob")
dest, err := ref.NewImageDestination(context.Background(), nil)
require.NoError(t, err)
defer dest.Close()
assert.Equal(t, types.PreserveOriginal, dest.DesiredLayerCompression())
info, err := dest.PutBlob(context.Background(), bytes.NewReader(blob), types.BlobInfo{Digest: digest.Digest("sha256:digest-test"), Size: int64(9)}, cache, false)
// PutBlob with caller-provided data
providedInfo, err := dest.PutBlob(context.Background(), bytes.NewReader(providedBlob), types.BlobInfo{Digest: providedDigest, Size: int64(len(providedBlob))}, cache, false)
assert.NoError(t, err)
assert.Equal(t, int64(len(providedBlob)), providedInfo.Size)
assert.Equal(t, providedDigest, providedInfo.Digest)
// PutBlob with unknown data
computedInfo, err := dest.PutBlob(context.Background(), bytes.NewReader(computedBlob), types.BlobInfo{Digest: "", Size: int64(-1)}, cache, false)
assert.NoError(t, err)
assert.Equal(t, int64(len(computedBlob)), computedInfo.Size)
assert.Equal(t, digest.FromBytes(computedBlob), computedInfo.Digest)
err = dest.Commit(context.Background(), nil) // nil unparsedToplevel is invalid, we don’t currently use the value
assert.NoError(t, err)
assert.Equal(t, int64(9), info.Size)
assert.Equal(t, digest.FromBytes(blob), info.Digest)

src, err := ref.NewImageSource(context.Background(), nil)
require.NoError(t, err)
defer src.Close()
rc, size, err := src.GetBlob(context.Background(), info, cache)
assert.NoError(t, err)
defer rc.Close()
b, err := ioutil.ReadAll(rc)
assert.NoError(t, err)
assert.Equal(t, blob, b)
assert.Equal(t, int64(len(blob)), size)
for digest, expectedBlob := range map[digest.Digest][]byte{
providedInfo.Digest: providedBlob,
computedInfo.Digest: computedBlob,
} {
rc, size, err := src.GetBlob(context.Background(), types.BlobInfo{Digest: digest, Size: int64(len(expectedBlob))}, cache)
assert.NoError(t, err)
defer rc.Close()
b, err := ioutil.ReadAll(rc)
assert.NoError(t, err)
assert.Equal(t, expectedBlob, b)
assert.Equal(t, int64(len(expectedBlob)), size)
}
}

// readerFromFunc allows implementing Reader by any function, e.g. a closure.
Expand Down
4 changes: 2 additions & 2 deletions docker/docker_image_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/internal/blobinfocache"
"github.com/containers/image/v5/internal/iolimits"
"github.com/containers/image/v5/internal/putblobdigest"
"github.com/containers/image/v5/internal/uploadreader"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/pkg/blobinfocache/none"
Expand Down Expand Up @@ -161,8 +162,7 @@ func (d *dockerImageDestination) PutBlob(ctx context.Context, stream io.Reader,
return types.BlobInfo{}, errors.Wrap(err, "determining upload URL")
}

digester := digest.Canonical.Digester()
stream = io.TeeReader(stream, digester.Hash())
digester, stream := putblobdigest.DigestIfCanonicalUnknown(stream, inputInfo)
sizeCounter := &sizeCounter{}
stream = io.TeeReader(stream, sizeCounter)

Expand Down
8 changes: 3 additions & 5 deletions docker/internal/tarfile/dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/internal/iolimits"
"github.com/containers/image/v5/internal/putblobdigest"
"github.com/containers/image/v5/internal/tmpdir"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/types"
Expand Down Expand Up @@ -104,8 +105,7 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t
defer os.Remove(streamCopy.Name())
defer streamCopy.Close()

digester := digest.Canonical.Digester()
stream2 := io.TeeReader(stream, digester.Hash())
digester, stream2 := putblobdigest.DigestIfUnknown(stream, inputInfo)
// TODO: This can take quite some time, and should ideally be cancellable using ctx.Done().
size, err := io.Copy(streamCopy, stream2)
if err != nil {
Expand All @@ -116,9 +116,7 @@ func (d *Destination) PutBlob(ctx context.Context, stream io.Reader, inputInfo t
return types.BlobInfo{}, err
}
inputInfo.Size = size // inputInfo is a struct, so we are only modifying our copy.
if inputInfo.Digest == "" {
inputInfo.Digest = digester.Digest()
}
inputInfo.Digest = digester.Digest()
stream = streamCopy
logrus.Debugf("... streaming done")
}
Expand Down
57 changes: 57 additions & 0 deletions internal/putblobdigest/put_blob_digest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package putblobdigest

import (
"io"

"github.com/containers/image/v5/types"
"github.com/opencontainers/go-digest"
)

// Digester computes a digest of the provided stream, if not known yet.
type Digester struct {
knownDigest digest.Digest // Or ""
digester digest.Digester // Or nil
}

// newDigester initiates computation of a digest.Canonical digest of stream,
// if !validDigest; otherwise it just records knownDigest to be returned later.
// The caller MUST use the returned stream instead of the original value.
func newDigester(stream io.Reader, knownDigest digest.Digest, validDigest bool) (Digester, io.Reader) {
if validDigest {
return Digester{knownDigest: knownDigest}, stream
} else {
res := Digester{
digester: digest.Canonical.Digester(),
}
stream = io.TeeReader(stream, res.digester.Hash())
return res, stream
}
}

// DigestIfUnknown initiates computation of a digest.Canonical digest of stream,
// if no digest is supplied in the provided blobInfo; otherwise blobInfo.Digest will
// be used (accepting any algorithm).
// The caller MUST use the returned stream instead of the original value.
func DigestIfUnknown(stream io.Reader, blobInfo types.BlobInfo) (Digester, io.Reader) {
d := blobInfo.Digest
return newDigester(stream, d, d != "")
}

// DigestIfCanonicalUnknown initiates computation of a digest.Canonical digest of stream,
// if a digest.Canonical digest is not supplied in the provided blobInfo;
// otherwise blobInfo.Digest will be used.
// The caller MUST use the returned stream instead of the original value.
func DigestIfCanonicalUnknown(stream io.Reader, blobInfo types.BlobInfo) (Digester, io.Reader) {
d := blobInfo.Digest
return newDigester(stream, d, d != "" && d.Algorithm() == digest.Canonical)
}

// Digest() returns a digest value possibly computed by Digester.
// This must be called only after all of the stream returned by a Digester constructor
// has been successfully read.
func (d Digester) Digest() digest.Digest {
if d.digester != nil {
return d.digester.Digest()
}
return d.knownDigest
}
75 changes: 75 additions & 0 deletions internal/putblobdigest/put_blob_digest_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package putblobdigest

import (
"bytes"
"io"
"io/ioutil"
"testing"

"github.com/containers/image/v5/types"
"github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

var testData = []byte("test data")

type testCase struct {
inputDigest digest.Digest
computesDigest bool
expectedDigest digest.Digest
}

func testDigester(t *testing.T, constructor func(io.Reader, types.BlobInfo) (Digester, io.Reader),
cases []testCase) {
for _, c := range cases {
stream := bytes.NewReader(testData)
digester, newStream := constructor(stream, types.BlobInfo{Digest: c.inputDigest})
assert.Equal(t, c.computesDigest, newStream != stream, c.inputDigest)
data, err := ioutil.ReadAll(newStream)
require.NoError(t, err, c.inputDigest)
assert.Equal(t, testData, data, c.inputDigest)
digest := digester.Digest()
assert.Equal(t, c.expectedDigest, digest, c.inputDigest)
}
}

func TestDigestIfUnknown(t *testing.T) {
testDigester(t, DigestIfUnknown, []testCase{
{
inputDigest: digest.Digest("sha256:uninspected-value"),
computesDigest: false,
expectedDigest: digest.Digest("sha256:uninspected-value"),
},
{
inputDigest: digest.Digest("unknown-algorithm:uninspected-value"),
computesDigest: false,
expectedDigest: digest.Digest("unknown-algorithm:uninspected-value"),
},
{
inputDigest: "",
computesDigest: true,
expectedDigest: digest.Canonical.FromBytes(testData),
},
})
}

func TestDigestIfCanonicalUnknown(t *testing.T) {
testDigester(t, DigestIfCanonicalUnknown, []testCase{
{
inputDigest: digest.Digest("sha256:uninspected-value"),
computesDigest: false,
expectedDigest: digest.Digest("sha256:uninspected-value"),
},
{
inputDigest: digest.Digest("unknown-algorithm:uninspected-value"),
computesDigest: true,
expectedDigest: digest.Canonical.FromBytes(testData),
},
{
inputDigest: "",
computesDigest: true,
expectedDigest: digest.Canonical.FromBytes(testData),
},
})
}
4 changes: 2 additions & 2 deletions oci/layout/oci_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"path/filepath"
"runtime"

"github.com/containers/image/v5/internal/putblobdigest"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/types"
digest "github.com/opencontainers/go-digest"
Expand Down Expand Up @@ -138,8 +139,7 @@ func (d *ociImageDestination) PutBlob(ctx context.Context, stream io.Reader, inp
}
}()

digester := digest.Canonical.Digester()
stream = io.TeeReader(stream, digester.Hash())
digester, stream := putblobdigest.DigestIfCanonicalUnknown(stream, inputInfo)
// TODO: This can take quite some time, and should ideally be cancellable using ctx.Done().
size, err := io.Copy(blobFile, stream)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions ostree/ostree_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"time"
"unsafe"

"github.com/containers/image/v5/internal/putblobdigest"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/types"
"github.com/containers/storage/pkg/archive"
Expand Down Expand Up @@ -159,8 +160,7 @@ func (d *ostreeImageDestination) PutBlob(ctx context.Context, stream io.Reader,
}
defer blobFile.Close()

digester := digest.Canonical.Digester()
stream = io.TeeReader(stream, digester.Hash())
digester, stream := putblobdigest.DigestIfCanonicalUnknown(stream, inputInfo)
// TODO: This can take quite some time, and should ideally be cancellable using ctx.Done().
size, err := io.Copy(blobFile, stream)
if err != nil {
Expand Down
14 changes: 3 additions & 11 deletions storage/storage_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/image"
"github.com/containers/image/v5/internal/putblobdigest"
"github.com/containers/image/v5/internal/tmpdir"
internalTypes "github.com/containers/image/v5/internal/types"
"github.com/containers/image/v5/manifest"
Expand Down Expand Up @@ -492,10 +493,6 @@ func (s *storageImageDestination) PutBlob(ctx context.Context, stream io.Reader,
}

// Set up to digest the blob if necessary, and count its size while saving it to a file.
var hasher digest.Digester // = nil when we don't need to compute the blob digest
if blobinfo.Digest == "" {
hasher = digest.Canonical.Digester()
}
filename := s.computeNextBlobCacheFile()
file, err := os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY|os.O_EXCL, 0600)
if err != nil {
Expand All @@ -504,9 +501,7 @@ func (s *storageImageDestination) PutBlob(ctx context.Context, stream io.Reader,
defer file.Close()
counter := ioutils.NewWriteCounter(file)
stream = io.TeeReader(stream, counter)
if hasher != nil {
stream = io.TeeReader(stream, hasher.Hash())
}
digester, stream := putblobdigest.DigestIfUnknown(stream, blobinfo)
decompressed, err := archive.DecompressStream(stream)
if err != nil {
return errorBlobInfo, errors.Wrap(err, "setting up to decompress blob")
Expand All @@ -523,10 +518,7 @@ func (s *storageImageDestination) PutBlob(ctx context.Context, stream io.Reader,

// Determine blob properties, and fail if information that we were given about the blob
// is known to be incorrect.
blobDigest := blobinfo.Digest
if hasher != nil {
blobDigest = hasher.Digest()
}
blobDigest := digester.Digest()
blobSize := blobinfo.Size
if blobSize < 0 {
blobSize = counter.Count
Expand Down

0 comments on commit bc2f150

Please sign in to comment.