vendor: bump c/common and other vendors

This commit bumps majorly c/common so netavark features could be synced with podman. But there are some other vendor bumps as well [NO NEW TESTS NEEDED] [NO TESTS NEEDED] Signed-off-by: Aditya R <arajan@redhat.com>
author: Aditya R <arajan@redhat.com> 2022-01-20 12:40:07 +0530
committer: Aditya R <arajan@redhat.com> 2022-01-20 12:40:11 +0530
commit: 2c492be00a13bfbc389d2b1b97c6bf91520e280e (patch)
tree: a0603d66b29dcc9ab91354ef583ba5e349f8409f /vendor/github.com/containers/storage/pkg
parent: f46478c1e9af601759e341de76d4c655b4a66068 (diff)
7 files changed, 1704 insertions, 352 deletions
diff --git a/vendor/github.com/containers/storage/pkg/chunked/cache_linux.go b/vendor/github.com/containers/storage/pkg/chunked/cache_linux.go
new file mode 100644
index 000000000..a931fb5d1
--- /dev/null
+++ b/vendor/github.com/containers/storage/pkg/chunked/cache_linux.go
@@ -0,0 +1,630 @@
+package chunked
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+	"unsafe"
+
+	storage "github.com/containers/storage"
+	"github.com/containers/storage/pkg/chunked/internal"
+	"github.com/containers/storage/pkg/ioutils"
+	jsoniter "github.com/json-iterator/go"
+	digest "github.com/opencontainers/go-digest"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+const (
+	cacheKey     = "chunked-manifest-cache"
+	cacheVersion = 1
+)
+
+type metadata struct {
+	tagLen    int
+	digestLen int
+	tags      []byte
+	vdata     []byte
+}
+
+type layer struct {
+	id       string
+	metadata *metadata
+	target   string
+}
+
+type layersCache struct {
+	layers  []layer
+	refs    int
+	store   storage.Store
+	mutex   sync.RWMutex
+	created time.Time
+}
+
+var cacheMutex sync.Mutex
+var cache *layersCache
+
+func (c *layersCache) release() {
+	cacheMutex.Lock()
+	defer cacheMutex.Unlock()
+
+	c.refs--
+	if c.refs == 0 {
+		cache = nil
+	}
+}
+
+func getLayersCacheRef(store storage.Store) *layersCache {
+	cacheMutex.Lock()
+	defer cacheMutex.Unlock()
+	if cache != nil && cache.store == store && time.Since(cache.created).Minutes() < 10 {
+		cache.refs++
+		return cache
+	}
+	cache := &layersCache{
+		store:   store,
+		refs:    1,
+		created: time.Now(),
+	}
+	return cache
+}
+
+func getLayersCache(store storage.Store) (*layersCache, error) {
+	c := getLayersCacheRef(store)
+
+	if err := c.load(); err != nil {
+		c.release()
+		return nil, err
+	}
+	return c, nil
+}
+
+func (c *layersCache) load() error {
+	c.mutex.Lock()
+	defer c.mutex.Unlock()
+
+	allLayers, err := c.store.Layers()
+	if err != nil {
+		return err
+	}
+	existingLayers := make(map[string]string)
+	for _, r := range c.layers {
+		existingLayers[r.id] = r.target
+	}
+
+	currentLayers := make(map[string]string)
+	for _, r := range allLayers {
+		currentLayers[r.ID] = r.ID
+		if _, found := existingLayers[r.ID]; found {
+			continue
+		}
+
+		bigData, err := c.store.LayerBigData(r.ID, cacheKey)
+		if err != nil {
+			if errors.Cause(err) == os.ErrNotExist {
+				continue
+			}
+			return err
+		}
+		defer bigData.Close()
+
+		metadata, err := readMetadataFromCache(bigData)
+		if err != nil {
+			logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err)
+		}
+
+		if metadata != nil {
+			c.addLayer(r.ID, metadata)
+			continue
+		}
+
+		manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey)
+		if err != nil {
+			continue
+		}
+		defer manifestReader.Close()
+		manifest, err := ioutil.ReadAll(manifestReader)
+		if err != nil {
+			return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
+		}
+
+		metadata, err = writeCache(manifest, r.ID, c.store)
+		if err == nil {
+			c.addLayer(r.ID, metadata)
+		}
+	}
+
+	var newLayers []layer
+	for _, l := range c.layers {
+		if _, found := currentLayers[l.id]; found {
+			newLayers = append(newLayers, l)
+		}
+	}
+	c.layers = newLayers
+
+	return nil
+}
+
+// calculateHardLinkFingerprint calculates a hash that can be used to verify if a file
+// is usable for deduplication with hardlinks.
+// To calculate the digest, it uses the file payload digest, UID, GID, mode and xattrs.
+func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) {
+	digester := digest.Canonical.Digester()
+
+	modeString := fmt.Sprintf("%d:%d:%o", f.UID, f.GID, f.Mode)
+	hash := digester.Hash()
+
+	if _, err := hash.Write([]byte(f.Digest)); err != nil {
+		return "", err
+	}
+
+	if _, err := hash.Write([]byte(modeString)); err != nil {
+		return "", err
+	}
+
+	if len(f.Xattrs) > 0 {
+		keys := make([]string, 0, len(f.Xattrs))
+		for k := range f.Xattrs {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+
+		for _, k := range keys {
+			if _, err := hash.Write([]byte(k)); err != nil {
+				return "", err
+			}
+			if _, err := hash.Write([]byte(f.Xattrs[k])); err != nil {
+				return "", err
+			}
+		}
+	}
+	return string(digester.Digest()), nil
+}
+
+// generateFileLocation generates a file location in the form $OFFSET@$PATH
+func generateFileLocation(path string, offset uint64) []byte {
+	return []byte(fmt.Sprintf("%d@%s", offset, path))
+}
+
+// generateTag generates a tag in the form $DIGEST$OFFSET@LEN.
+// the [OFFSET; LEN] points to the variable length data where the file locations
+// are stored.  $DIGEST has length digestLen stored in the metadata file header.
+func generateTag(digest string, offset, len uint64) string {
+	return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len)
+}
+
+type setBigData interface {
+	// SetLayerBigData stores a (possibly large) chunk of named data
+	SetLayerBigData(id, key string, data io.Reader) error
+}
+
+// writeCache write a cache for the layer ID.
+// It generates a sorted list of digests with their offset to the path location and offset.
+// The same cache is used to lookup files, chunks and candidates for deduplication with hard links.
+// There are 3 kind of digests stored:
+// - digest(file.payload))
+// - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs)
+// - digest(i) for each i in chunks(file payload)
+func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) {
+	var vdata bytes.Buffer
+	tagLen := 0
+	digestLen := 0
+	var tagsBuffer bytes.Buffer
+
+	toc, err := prepareMetadata(manifest)
+	if err != nil {
+		return nil, err
+	}
+
+	var tags []string
+	for _, k := range toc {
+		if k.Digest != "" {
+			location := generateFileLocation(k.Name, 0)
+
+			off := uint64(vdata.Len())
+			l := uint64(len(location))
+
+			d := generateTag(k.Digest, off, l)
+			if tagLen == 0 {
+				tagLen = len(d)
+			}
+			if tagLen != len(d) {
+				return nil, errors.New("digest with different length found")
+			}
+			tags = append(tags, d)
+
+			fp, err := calculateHardLinkFingerprint(k)
+			if err != nil {
+				return nil, err
+			}
+			d = generateTag(fp, off, l)
+			if tagLen != len(d) {
+				return nil, errors.New("digest with different length found")
+			}
+			tags = append(tags, d)
+
+			if _, err := vdata.Write(location); err != nil {
+				return nil, err
+			}
+
+			digestLen = len(k.Digest)
+		}
+		if k.ChunkDigest != "" {
+			location := generateFileLocation(k.Name, uint64(k.ChunkOffset))
+			off := uint64(vdata.Len())
+			l := uint64(len(location))
+			d := generateTag(k.ChunkDigest, off, l)
+			if tagLen == 0 {
+				tagLen = len(d)
+			}
+			if tagLen != len(d) {
+				return nil, errors.New("digest with different length found")
+			}
+			tags = append(tags, d)
+
+			if _, err := vdata.Write(location); err != nil {
+				return nil, err
+			}
+			digestLen = len(k.ChunkDigest)
+		}
+	}
+
+	sort.Strings(tags)
+
+	for _, t := range tags {
+		if _, err := tagsBuffer.Write([]byte(t)); err != nil {
+			return nil, err
+		}
+	}
+
+	pipeReader, pipeWriter := io.Pipe()
+	errChan := make(chan error, 1)
+	go func() {
+		defer pipeWriter.Close()
+		defer close(errChan)
+
+		// version
+		if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(cacheVersion)); err != nil {
+			errChan <- err
+			return
+		}
+
+		// len of a tag
+		if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagLen)); err != nil {
+			errChan <- err
+			return
+		}
+
+		// len of a digest
+		if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(digestLen)); err != nil {
+			errChan <- err
+			return
+		}
+
+		// tags length
+		if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil {
+			errChan <- err
+			return
+		}
+
+		// vdata length
+		if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(vdata.Len())); err != nil {
+			errChan <- err
+			return
+		}
+
+		// tags
+		if _, err := pipeWriter.Write(tagsBuffer.Bytes()); err != nil {
+			errChan <- err
+			return
+		}
+
+		// variable length data
+		if _, err := pipeWriter.Write(vdata.Bytes()); err != nil {
+			errChan <- err
+			return
+		}
+
+		errChan <- nil
+	}()
+	defer pipeReader.Close()
+
+	counter := ioutils.NewWriteCounter(ioutil.Discard)
+
+	r := io.TeeReader(pipeReader, counter)
+
+	if err := dest.SetLayerBigData(id, cacheKey, r); err != nil {
+		return nil, err
+	}
+
+	if err := <-errChan; err != nil {
+		return nil, err
+	}
+
+	logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count)
+
+	return &metadata{
+		digestLen: digestLen,
+		tagLen:    tagLen,
+		tags:      tagsBuffer.Bytes(),
+		vdata:     vdata.Bytes(),
+	}, nil
+}
+
+func readMetadataFromCache(bigData io.Reader) (*metadata, error) {
+	var version, tagLen, digestLen, tagsLen, vdataLen uint64
+	if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil {
+		return nil, err
+	}
+	if version != cacheVersion {
+		return nil, nil
+	}
+	if err := binary.Read(bigData, binary.LittleEndian, &tagLen); err != nil {
+		return nil, err
+	}
+	if err := binary.Read(bigData, binary.LittleEndian, &digestLen); err != nil {
+		return nil, err
+	}
+	if err := binary.Read(bigData, binary.LittleEndian, &tagsLen); err != nil {
+		return nil, err
+	}
+	if err := binary.Read(bigData, binary.LittleEndian, &vdataLen); err != nil {
+		return nil, err
+	}
+
+	tags := make([]byte, tagsLen)
+	if _, err := bigData.Read(tags); err != nil {
+		return nil, err
+	}
+
+	vdata := make([]byte, vdataLen)
+	if _, err := bigData.Read(vdata); err != nil {
+		return nil, err
+	}
+
+	return &metadata{
+		tagLen:    int(tagLen),
+		digestLen: int(digestLen),
+		tags:      tags,
+		vdata:     vdata,
+	}, nil
+}
+
+func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
+	toc, err := unmarshalToc(manifest)
+	if err != nil {
+		// ignore errors here.  They might be caused by a different manifest format.
+		return nil, nil
+	}
+
+	var r []*internal.FileMetadata
+	chunkSeen := make(map[string]bool)
+	for i := range toc.Entries {
+		d := toc.Entries[i].Digest
+		if d != "" {
+			r = append(r, &toc.Entries[i])
+			continue
+		}
+
+		// chunks do not use hard link dedup so keeping just one candidate is enough
+		cd := toc.Entries[i].ChunkDigest
+		if cd != "" && !chunkSeen[cd] {
+			r = append(r, &toc.Entries[i])
+			chunkSeen[cd] = true
+		}
+	}
+	return r, nil
+}
+
+func (c *layersCache) addLayer(id string, metadata *metadata) error {
+	target, err := c.store.DifferTarget(id)
+	if err != nil {
+		return fmt.Errorf("get checkout directory layer %q: %w", id, err)
+	}
+
+	l := layer{
+		id:       id,
+		metadata: metadata,
+		target:   target,
+	}
+	c.layers = append(c.layers, l)
+	return nil
+}
+
+func byteSliceAsString(b []byte) string {
+	return *(*string)(unsafe.Pointer(&b))
+}
+
+func findTag(digest string, metadata *metadata) (string, uint64, uint64) {
+	if len(digest) != metadata.digestLen {
+		return "", 0, 0
+	}
+
+	nElements := len(metadata.tags) / metadata.tagLen
+
+	i := sort.Search(nElements, func(i int) bool {
+		d := byteSliceAsString(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+metadata.digestLen])
+		return strings.Compare(d, digest) >= 0
+	})
+	if i < nElements {
+		d := string(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+len(digest)])
+		if digest == d {
+			startOff := i*metadata.tagLen + metadata.digestLen
+			parts := strings.Split(string(metadata.tags[startOff:(i+1)*metadata.tagLen]), "@")
+			off, _ := strconv.ParseInt(parts[0], 10, 64)
+			len, _ := strconv.ParseInt(parts[1], 10, 64)
+			return digest, uint64(off), uint64(len)
+		}
+	}
+	return "", 0, 0
+}
+
+func (c *layersCache) findDigestInternal(digest string) (string, string, int64, error) {
+	if digest == "" {
+		return "", "", -1, nil
+	}
+
+	c.mutex.RLock()
+	defer c.mutex.RUnlock()
+
+	for _, layer := range c.layers {
+		digest, off, len := findTag(digest, layer.metadata)
+		if digest != "" {
+			position := string(layer.metadata.vdata[off : off+len])
+			parts := strings.SplitN(position, "@", 2)
+			offFile, _ := strconv.ParseInt(parts[0], 10, 64)
+			return layer.target, parts[1], offFile, nil
+		}
+	}
+
+	return "", "", -1, nil
+}
+
+// findFileInOtherLayers finds the specified file in other layers.
+// file is the file to look for.
+func (c *layersCache) findFileInOtherLayers(file *internal.FileMetadata, useHardLinks bool) (string, string, error) {
+	digest := file.Digest
+	if useHardLinks {
+		var err error
+		digest, err = calculateHardLinkFingerprint(file)
+		if err != nil {
+			return "", "", err
+		}
+	}
+	target, name, off, err := c.findDigestInternal(digest)
+	if off == 0 {
+		return target, name, err
+	}
+	return "", "", nil
+}
+
+func (c *layersCache) findChunkInOtherLayers(chunk *internal.FileMetadata) (string, string, int64, error) {
+	return c.findDigestInternal(chunk.ChunkDigest)
+}
+
+func unmarshalToc(manifest []byte) (*internal.TOC, error) {
+	var buf bytes.Buffer
+	count := 0
+	var toc internal.TOC
+
+	iter := jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)
+	for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+		if field != "entries" {
+			iter.Skip()
+			continue
+		}
+		for iter.ReadArray() {
+			for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+				switch field {
+				case "type", "name", "linkName", "digest", "chunkDigest", "chunkType":
+					count += len(iter.ReadStringAsSlice())
+				case "xattrs":
+					for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
+						count += len(iter.ReadStringAsSlice())
+					}
+				default:
+					iter.Skip()
+				}
+			}
+		}
+		break
+	}
+
+	buf.Grow(count)
+
+	getString := func(b []byte) string {
+		from := buf.Len()
+		buf.Write(b)
+		to := buf.Len()
+		return byteSliceAsString(buf.Bytes()[from:to])
+	}
+
+	iter = jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)
+	for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+		if field == "version" {
+			toc.Version = iter.ReadInt()
+			continue
+		}
+		if field != "entries" {
+			iter.Skip()
+			continue
+		}
+		for iter.ReadArray() {
+			var m internal.FileMetadata
+			for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+				switch field {
+				case "type":
+					m.Type = getString(iter.ReadStringAsSlice())
+				case "name":
+					m.Name = getString(iter.ReadStringAsSlice())
+				case "linkName":
+					m.Linkname = getString(iter.ReadStringAsSlice())
+				case "mode":
+					m.Mode = iter.ReadInt64()
+				case "size":
+					m.Size = iter.ReadInt64()
+				case "UID":
+					m.UID = iter.ReadInt()
+				case "GID":
+					m.GID = iter.ReadInt()
+				case "ModTime":
+					time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
+					if err != nil {
+						return nil, err
+					}
+					m.ModTime = &time
+				case "accesstime":
+					time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
+					if err != nil {
+						return nil, err
+					}
+					m.AccessTime = &time
+				case "changetime":
+					time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
+					if err != nil {
+						return nil, err
+					}
+					m.ChangeTime = &time
+				case "devMajor":
+					m.Devmajor = iter.ReadInt64()
+				case "devMinor":
+					m.Devminor = iter.ReadInt64()
+				case "digest":
+					m.Digest = getString(iter.ReadStringAsSlice())
+				case "offset":
+					m.Offset = iter.ReadInt64()
+				case "endOffset":
+					m.EndOffset = iter.ReadInt64()
+				case "chunkSize":
+					m.ChunkSize = iter.ReadInt64()
+				case "chunkOffset":
+					m.ChunkOffset = iter.ReadInt64()
+				case "chunkDigest":
+					m.ChunkDigest = getString(iter.ReadStringAsSlice())
+				case "chunkType":
+					m.ChunkType = getString(iter.ReadStringAsSlice())
+				case "xattrs":
+					m.Xattrs = make(map[string]string)
+					for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
+						value := iter.ReadStringAsSlice()
+						m.Xattrs[key] = getString(value)
+					}
+				default:
+					iter.Skip()
+				}
+			}
+			toc.Entries = append(toc.Entries, m)
+		}
+		break
+	}
+	toc.StringsBuf = buf
+	return &toc, nil
+}
diff --git a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
index 092cf584a..aeb7cfd4f 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
@@ -5,6 +5,7 @@ package compressor
 // larger software like the graph drivers.
 
 import (
+	"bufio"
 	"encoding/base64"
 	"io"
 	"io/ioutil"
@@ -15,6 +16,189 @@ import (
 	"github.com/vbatts/tar-split/archive/tar"
 )
 
+const RollsumBits = 16
+const holesThreshold = int64(1 << 10)
+
+type holesFinder struct {
+	reader    *bufio.Reader
+	fileOff   int64
+	zeros     int64
+	from      int64
+	threshold int64
+
+	state int
+}
+
+const (
+	holesFinderStateRead = iota
+	holesFinderStateAccumulate
+	holesFinderStateFound
+	holesFinderStateEOF
+)
+
+// ReadByte reads a single byte from the underlying reader.
+// If a single byte is read, the return value is (0, RAW-BYTE-VALUE, nil).
+// If there are at least f.THRESHOLD consecutive zeros, then the
+// return value is (N_CONSECUTIVE_ZEROS, '\x00').
+func (f *holesFinder) ReadByte() (int64, byte, error) {
+	for {
+		switch f.state {
+		// reading the file stream
+		case holesFinderStateRead:
+			if f.zeros > 0 {
+				f.zeros--
+				return 0, 0, nil
+			}
+			b, err := f.reader.ReadByte()
+			if err != nil {
+				return 0, b, err
+			}
+
+			if b != 0 {
+				return 0, b, err
+			}
+
+			f.zeros = 1
+			if f.zeros == f.threshold {
+				f.state = holesFinderStateFound
+			} else {
+				f.state = holesFinderStateAccumulate
+			}
+		// accumulating zeros, but still didn't reach the threshold
+		case holesFinderStateAccumulate:
+			b, err := f.reader.ReadByte()
+			if err != nil {
+				if err == io.EOF {
+					f.state = holesFinderStateEOF
+					continue
+				}
+				return 0, b, err
+			}
+
+			if b == 0 {
+				f.zeros++
+				if f.zeros == f.threshold {
+					f.state = holesFinderStateFound
+				}
+			} else {
+				if f.reader.UnreadByte(); err != nil {
+					return 0, 0, err
+				}
+				f.state = holesFinderStateRead
+			}
+		// found a hole.  Number of zeros >= threshold
+		case holesFinderStateFound:
+			b, err := f.reader.ReadByte()
+			if err != nil {
+				if err == io.EOF {
+					f.state = holesFinderStateEOF
+				}
+				holeLen := f.zeros
+				f.zeros = 0
+				return holeLen, 0, nil
+			}
+			if b != 0 {
+				if f.reader.UnreadByte(); err != nil {
+					return 0, 0, err
+				}
+				f.state = holesFinderStateRead
+
+				holeLen := f.zeros
+				f.zeros = 0
+				return holeLen, 0, nil
+			}
+			f.zeros++
+		// reached EOF.  Flush pending zeros if any.
+		case holesFinderStateEOF:
+			if f.zeros > 0 {
+				f.zeros--
+				return 0, 0, nil
+			}
+			return 0, 0, io.EOF
+		}
+	}
+}
+
+type rollingChecksumReader struct {
+	reader      *holesFinder
+	closed      bool
+	rollsum     *RollSum
+	pendingHole int64
+
+	// WrittenOut is the total number of bytes read from
+	// the stream.
+	WrittenOut int64
+
+	// IsLastChunkZeros tells whether the last generated
+	// chunk is a hole (made of consecutive zeros).  If it
+	// is false, then the last chunk is a data chunk
+	// generated by the rolling checksum.
+	IsLastChunkZeros bool
+}
+
+func (rc *rollingChecksumReader) Read(b []byte) (bool, int, error) {
+	rc.IsLastChunkZeros = false
+
+	if rc.pendingHole > 0 {
+		toCopy := int64(len(b))
+		if rc.pendingHole < toCopy {
+			toCopy = rc.pendingHole
+		}
+		rc.pendingHole -= toCopy
+		for i := int64(0); i < toCopy; i++ {
+			b[i] = 0
+		}
+
+		rc.WrittenOut += toCopy
+
+		rc.IsLastChunkZeros = true
+
+		// if there are no other zeros left, terminate the chunk
+		return rc.pendingHole == 0, int(toCopy), nil
+	}
+
+	if rc.closed {
+		return false, 0, io.EOF
+	}
+
+	for i := 0; i < len(b); i++ {
+		holeLen, n, err := rc.reader.ReadByte()
+		if err != nil {
+			if err == io.EOF {
+				rc.closed = true
+				if i == 0 {
+					return false, 0, err
+				}
+				return false, i, nil
+			}
+			// Report any other error type
+			return false, -1, err
+		}
+		if holeLen > 0 {
+			for j := int64(0); j < holeLen; j++ {
+				rc.rollsum.Roll(0)
+			}
+			rc.pendingHole = holeLen
+			return true, i, nil
+		}
+		b[i] = n
+		rc.WrittenOut++
+		rc.rollsum.Roll(n)
+		if rc.rollsum.OnSplitWithBits(RollsumBits) {
+			return true, i + 1, nil
+		}
+	}
+	return false, len(b), nil
+}
+
+type chunk struct {
+	ChunkOffset int64
+	Offset      int64
+	Checksum    string
+	ChunkSize   int64
+	ChunkType   string
+}
+
 func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
 	// total written so far.  Used to retrieve partial offsets in the file
 	dest := ioutils.NewWriteCounter(destFile)
@@ -64,40 +248,78 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
 		if _, err := zstdWriter.Write(rawBytes); err != nil {
 			return err
 		}
-		payloadDigester := digest.Canonical.Digester()
-		payloadChecksum := payloadDigester.Hash()
 
-		payloadDest := io.MultiWriter(payloadChecksum, zstdWriter)
+		payloadDigester := digest.Canonical.Digester()
+		chunkDigester := digest.Canonical.Digester()
 
 		// Now handle the payload, if any
-		var startOffset, endOffset int64
+		startOffset := int64(0)
+		lastOffset := int64(0)
+		lastChunkOffset := int64(0)
+
 		checksum := ""
+
+		chunks := []chunk{}
+
+		hf := &holesFinder{
+			threshold: holesThreshold,
+			reader:    bufio.NewReader(tr),
+		}
+
+		rcReader := &rollingChecksumReader{
+			reader:  hf,
+			rollsum: NewRollSum(),
+		}
+
+		payloadDest := io.MultiWriter(payloadDigester.Hash(), chunkDigester.Hash(), zstdWriter)
 		for {
-			read, errRead := tr.Read(buf)
+			mustSplit, read, errRead := rcReader.Read(buf)
 			if errRead != nil && errRead != io.EOF {
 				return err
 			}
-
-			// restart the compression only if there is
-			// a payload.
+			// restart the compression only if there is a payload.
 			if read > 0 {
 				if startOffset == 0 {
 					startOffset, err = restartCompression()
 					if err != nil {
 						return err
 					}
+					lastOffset = startOffset
+				}
+
+				if _, err := payloadDest.Write(buf[:read]); err != nil {
+					return err
 				}
-				_, err := payloadDest.Write(buf[:read])
+			}
+			if (mustSplit || errRead == io.EOF) && startOffset > 0 {
+				off, err := restartCompression()
 				if err != nil {
 					return err
 				}
+
+				chunkSize := rcReader.WrittenOut - lastChunkOffset
+				if chunkSize > 0 {
+					chunkType := internal.ChunkTypeData
+					if rcReader.IsLastChunkZeros {
+						chunkType = internal.ChunkTypeZeros
+					}
+
+					chunks = append(chunks, chunk{
+						ChunkOffset: lastChunkOffset,
+						Offset:      lastOffset,
+						Checksum:    chunkDigester.Digest().String(),
+						ChunkSize:   chunkSize,
+						ChunkType:   chunkType,
+					})
+				}
+
+				lastOffset = off
+				lastChunkOffset = rcReader.WrittenOut
+				chunkDigester = digest.Canonical.Digester()
+				payloadDest = io.MultiWriter(payloadDigester.Hash(), chunkDigester.Hash(), zstdWriter)
 			}
 			if errRead == io.EOF {
 				if startOffset > 0 {
-					endOffset, err = restartCompression()
-					if err != nil {
-						return err
-					}
 					checksum = payloadDigester.Digest().String()
 				}
 				break
@@ -112,30 +334,42 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
 		for k, v := range hdr.Xattrs {
 			xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
 		}
-		m := internal.FileMetadata{
-			Type:       typ,
-			Name:       hdr.Name,
-			Linkname:   hdr.Linkname,
-			Mode:       hdr.Mode,
-			Size:       hdr.Size,
-			UID:        hdr.Uid,
-			GID:        hdr.Gid,
-			ModTime:    hdr.ModTime,
-			AccessTime: hdr.AccessTime,
-			ChangeTime: hdr.ChangeTime,
-			Devmajor:   hdr.Devmajor,
-			Devminor:   hdr.Devminor,
-			Xattrs:     xattrs,
-			Digest:     checksum,
-			Offset:     startOffset,
-			EndOffset:  endOffset,
-
-			// ChunkSize is 0 for the last chunk
-			ChunkSize:   0,
-			ChunkOffset: 0,
-			ChunkDigest: checksum,
-		}
-		metadata = append(metadata, m)
+		entries := []internal.FileMetadata{
+			{
+				Type:       typ,
+				Name:       hdr.Name,
+				Linkname:   hdr.Linkname,
+				Mode:       hdr.Mode,
+				Size:       hdr.Size,
+				UID:        hdr.Uid,
+				GID:        hdr.Gid,
+				ModTime:    &hdr.ModTime,
+				AccessTime: &hdr.AccessTime,
+				ChangeTime: &hdr.ChangeTime,
+				Devmajor:   hdr.Devmajor,
+				Devminor:   hdr.Devminor,
+				Xattrs:     xattrs,
+				Digest:     checksum,
+				Offset:     startOffset,
+				EndOffset:  lastOffset,
+			},
+		}
+		for i := 1; i < len(chunks); i++ {
+			entries = append(entries, internal.FileMetadata{
+				Type:        internal.TypeChunk,
+				Name:        hdr.Name,
+				ChunkOffset: chunks[i].ChunkOffset,
+			})
+		}
+		if len(chunks) > 1 {
+			for i := range chunks {
+				entries[i].ChunkSize = chunks[i].ChunkSize
+				entries[i].Offset = chunks[i].Offset
+				entries[i].ChunkDigest = chunks[i].Checksum
+				entries[i].ChunkType = chunks[i].ChunkType
+			}
+		}
+		metadata = append(metadata, entries...)
 	}
 
 	rawBytes := tr.RawBytes()
@@ -212,7 +446,7 @@ func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level
 // ZstdCompressor is a CompressorFunc for the zstd compression algorithm.
 func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) {
 	if level == nil {
-		l := 3
+		l := 10
 		level = &l
 	}
 
diff --git a/vendor/github.com/containers/storage/pkg/chunked/compressor/rollsum.go b/vendor/github.com/containers/storage/pkg/chunked/compressor/rollsum.go
new file mode 100644
index 000000000..f4dfad822
--- /dev/null
+++ b/vendor/github.com/containers/storage/pkg/chunked/compressor/rollsum.go
@@ -0,0 +1,81 @@
+/*
+Copyright 2011 The Perkeep Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package rollsum implements rolling checksums similar to apenwarr's bup, which
+// is similar to librsync.
+//
+// The bup project is at https://github.com/apenwarr/bup and its splitting in
+// particular is at https://github.com/apenwarr/bup/blob/master/lib/bup/bupsplit.c
+package compressor
+
+import (
+	"math/bits"
+)
+
+const windowSize = 64 // Roll assumes windowSize is a power of 2
+const charOffset = 31
+
+const blobBits = 13
+const blobSize = 1 << blobBits // 8k
+
+type RollSum struct {
+	s1, s2 uint32
+	window [windowSize]uint8
+	wofs   int
+}
+
+func NewRollSum() *RollSum {
+	return &RollSum{
+		s1: windowSize * charOffset,
+		s2: windowSize * (windowSize - 1) * charOffset,
+	}
+}
+
+func (rs *RollSum) add(drop, add uint32) {
+	s1 := rs.s1 + add - drop
+	rs.s1 = s1
+	rs.s2 += s1 - uint32(windowSize)*(drop+charOffset)
+}
+
+// Roll adds ch to the rolling sum.
+func (rs *RollSum) Roll(ch byte) {
+	wp := &rs.window[rs.wofs]
+	rs.add(uint32(*wp), uint32(ch))
+	*wp = ch
+	rs.wofs = (rs.wofs + 1) & (windowSize - 1)
+}
+
+// OnSplit reports whether at least 13 consecutive trailing bits of
+// the current checksum are set the same way.
+func (rs *RollSum) OnSplit() bool {
+	return (rs.s2 & (blobSize - 1)) == ((^0) & (blobSize - 1))
+}
+
+// OnSplitWithBits reports whether at least n consecutive trailing bits
+// of the current checksum are set the same way.
+func (rs *RollSum) OnSplitWithBits(n uint32) bool {
+	mask := (uint32(1) << n) - 1
+	return rs.s2&mask == (^uint32(0))&mask
+}
+
+func (rs *RollSum) Bits() int {
+	rsum := rs.Digest() >> (blobBits + 1)
+	return blobBits + bits.TrailingZeros32(^rsum)
+}
+
+func (rs *RollSum) Digest() uint32 {
+	return (rs.s1 << 16) | (rs.s2 & 0xffff)
+}
diff --git a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
index c91c43d85..3bb5286d9 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
@@ -8,11 +8,11 @@ import (
 	"archive/tar"
 	"bytes"
 	"encoding/binary"
-	"encoding/json"
 	"fmt"
 	"io"
 	"time"
 
+	jsoniter "github.com/json-iterator/go"
 	"github.com/klauspost/compress/zstd"
 	"github.com/opencontainers/go-digest"
 )
@@ -20,6 +20,9 @@ import (
 type TOC struct {
 	Version int            `json:"version"`
 	Entries []FileMetadata `json:"entries"`
+
+	// internal: used by unmarshalToc
+	StringsBuf bytes.Buffer `json:"-"`
 }
 
 type FileMetadata struct {
@@ -27,26 +30,34 @@ type FileMetadata struct {
 	Name       string            `json:"name"`
 	Linkname   string            `json:"linkName,omitempty"`
 	Mode       int64             `json:"mode,omitempty"`
-	Size       int64             `json:"size"`
-	UID        int               `json:"uid"`
-	GID        int               `json:"gid"`
-	ModTime    time.Time         `json:"modtime"`
-	AccessTime time.Time         `json:"accesstime"`
-	ChangeTime time.Time         `json:"changetime"`
-	Devmajor   int64             `json:"devMajor"`
-	Devminor   int64             `json:"devMinor"`
+	Size       int64             `json:"size,omitempty"`
+	UID        int               `json:"uid,omitempty"`
+	GID        int               `json:"gid,omitempty"`
+	ModTime    *time.Time        `json:"modtime,omitempty"`
+	AccessTime *time.Time        `json:"accesstime,omitempty"`
+	ChangeTime *time.Time        `json:"changetime,omitempty"`
+	Devmajor   int64             `json:"devMajor,omitempty"`
+	Devminor   int64             `json:"devMinor,omitempty"`
 	Xattrs     map[string]string `json:"xattrs,omitempty"`
 	Digest     string            `json:"digest,omitempty"`
 	Offset     int64             `json:"offset,omitempty"`
 	EndOffset  int64             `json:"endOffset,omitempty"`
 
-	// Currently chunking is not supported.
 	ChunkSize   int64  `json:"chunkSize,omitempty"`
 	ChunkOffset int64  `json:"chunkOffset,omitempty"`
 	ChunkDigest string `json:"chunkDigest,omitempty"`
+	ChunkType   string `json:"chunkType,omitempty"`
+
+	// internal: computed by mergeTOCEntries.
+	Chunks []*FileMetadata `json:"-"`
 }
 
 const (
+	ChunkTypeData  = ""
+	ChunkTypeZeros = "zeros"
+)
+
+const (
 	TypeReg     = "reg"
 	TypeChunk   = "chunk"
 	TypeLink    = "hardlink"
@@ -123,6 +134,7 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
 		Entries: metadata,
 	}
 
+	var json = jsoniter.ConfigCompatibleWithStandardLibrary
 	// Generate the manifest
 	manifest, err := json.Marshal(toc)
 	if err != nil {
diff --git a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
index 52d21d689..92b15c2bf 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
@@ -4,8 +4,8 @@ import (
 	archivetar "archive/tar"
 	"context"
 	"encoding/base64"
-	"encoding/json"
 	"fmt"
+	"hash"
 	"io"
 	"io/ioutil"
 	"os"
@@ -13,6 +13,7 @@ import (
 	"reflect"
 	"sort"
 	"strings"
+	"sync"
 	"sync/atomic"
 	"syscall"
 	"time"
@@ -43,28 +44,35 @@ const (
 	bigDataKey              = "zstd-chunked-manifest"
 
 	fileTypeZstdChunked = iota
-	fileTypeEstargz     = iota
+	fileTypeEstargz
+	fileTypeNoCompression
+	fileTypeHole
+
+	copyGoRoutines = 32
 )
 
 type compressedFileType int
 
 type chunkedDiffer struct {
-	stream         ImageSourceSeekable
-	manifest       []byte
-	layersMetadata map[string][]internal.FileMetadata
-	layersTarget   map[string]string
-	tocOffset      int64
-	fileType       compressedFileType
+	stream      ImageSourceSeekable
+	manifest    []byte
+	layersCache *layersCache
+	tocOffset   int64
+	fileType    compressedFileType
+
+	copyBuffer []byte
 
 	gzipReader *pgzip.Reader
+	zstdReader *zstd.Decoder
+	rawReader  io.Reader
 }
 
 var xattrsToIgnore = map[string]interface{}{
 	"security.selinux": true,
 }
 
-func timeToTimespec(time time.Time) (ts unix.Timespec) {
-	if time.IsZero() {
+func timeToTimespec(time *time.Time) (ts unix.Timespec) {
+	if time == nil || time.IsZero() {
 		// Return UTIME_OMIT special value
 		ts.Sec = 0
 		ts.Nsec = ((1 << 30) - 2)
@@ -128,54 +136,6 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us
 	return dstFile, st.Size(), nil
 }
 
-func prepareOtherLayersCache(layersMetadata map[string][]internal.FileMetadata) map[string]map[string][]*internal.FileMetadata {
-	maps := make(map[string]map[string][]*internal.FileMetadata)
-
-	for layerID, v := range layersMetadata {
-		r := make(map[string][]*internal.FileMetadata)
-		for i := range v {
-			if v[i].Digest != "" {
-				r[v[i].Digest] = append(r[v[i].Digest], &v[i])
-			}
-		}
-		maps[layerID] = r
-	}
-	return maps
-}
-
-func getLayersCache(store storage.Store) (map[string][]internal.FileMetadata, map[string]string, error) {
-	allLayers, err := store.Layers()
-	if err != nil {
-		return nil, nil, err
-	}
-
-	layersMetadata := make(map[string][]internal.FileMetadata)
-	layersTarget := make(map[string]string)
-	for _, r := range allLayers {
-		manifestReader, err := store.LayerBigData(r.ID, bigDataKey)
-		if err != nil {
-			continue
-		}
-		defer manifestReader.Close()
-		manifest, err := ioutil.ReadAll(manifestReader)
-		if err != nil {
-			return nil, nil, fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
-		}
-		var toc internal.TOC
-		if err := json.Unmarshal(manifest, &toc); err != nil {
-			continue
-		}
-		layersMetadata[r.ID] = toc.Entries
-		target, err := store.DifferTarget(r.ID)
-		if err != nil {
-			return nil, nil, fmt.Errorf("get checkout directory layer %q: %w", r.ID, err)
-		}
-		layersTarget[r.ID] = target
-	}
-
-	return layersMetadata, layersTarget, nil
-}
-
 // GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
 func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
 	if _, ok := annotations[internal.ManifestChecksumKey]; ok {
@@ -192,18 +152,18 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
 	if err != nil {
 		return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
 	}
-	layersMetadata, layersTarget, err := getLayersCache(store)
+	layersCache, err := getLayersCache(store)
 	if err != nil {
 		return nil, err
 	}
 
 	return &chunkedDiffer{
-		stream:         iss,
-		manifest:       manifest,
-		layersMetadata: layersMetadata,
-		layersTarget:   layersTarget,
-		tocOffset:      tocOffset,
-		fileType:       fileTypeZstdChunked,
+		copyBuffer:  makeCopyBuffer(),
+		stream:      iss,
+		manifest:    manifest,
+		layersCache: layersCache,
+		tocOffset:   tocOffset,
+		fileType:    fileTypeZstdChunked,
 	}, nil
 }
 
@@ -212,37 +172,41 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize
 	if err != nil {
 		return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
 	}
-	layersMetadata, layersTarget, err := getLayersCache(store)
+	layersCache, err := getLayersCache(store)
 	if err != nil {
 		return nil, err
 	}
 
 	return &chunkedDiffer{
-		stream:         iss,
-		manifest:       manifest,
-		layersMetadata: layersMetadata,
-		layersTarget:   layersTarget,
-		tocOffset:      tocOffset,
-		fileType:       fileTypeEstargz,
+		copyBuffer:  makeCopyBuffer(),
+		stream:      iss,
+		manifest:    manifest,
+		layersCache: layersCache,
+		tocOffset:   tocOffset,
+		fileType:    fileTypeEstargz,
 	}, nil
 }
 
+func makeCopyBuffer() []byte {
+	return make([]byte, 2<<20)
+}
+
 // copyFileFromOtherLayer copies a file from another layer
 // file is the file to look for.
 // source is the path to the source layer checkout.
-// otherFile contains the metadata for the file.
+// name is the path to the file to copy in source.
 // dirfd is an open file descriptor to the destination root directory.
 // useHardLinks defines whether the deduplication can be performed using hard links.
-func copyFileFromOtherLayer(file *internal.FileMetadata, source string, otherFile *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
 	srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
 	if err != nil {
-		return false, nil, 0, fmt.Errorf("open source file %q: %w", source, err)
+		return false, nil, 0, fmt.Errorf("open source file: %w", err)
 	}
 	defer unix.Close(srcDirfd)
 
-	srcFile, err := openFileUnderRoot(otherFile.Name, srcDirfd, unix.O_RDONLY, 0)
+	srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0)
 	if err != nil {
-		return false, nil, 0, fmt.Errorf("open source file %q under target rootfs: %w", otherFile.Name, err)
+		return false, nil, 0, fmt.Errorf("open source file under target rootfs: %w", err)
 	}
 	defer srcFile.Close()
 
@@ -308,45 +272,9 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo
 	return canDedupMetadataWithHardLink(file, &otherFile)
 }
 
-// findFileInOtherLayers finds the specified file in other layers.
-// file is the file to look for.
-// dirfd is an open file descriptor to the checkout root directory.
-// layersMetadata contains the metadata for each layer in the storage.
-// layersTarget maps each layer to its checkout on disk.
-// useHardLinks defines whether the deduplication can be performed using hard links.
-func findFileInOtherLayers(file *internal.FileMetadata, dirfd int, layersMetadata map[string]map[string][]*internal.FileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) {
-	// this is ugly, needs to be indexed
-	for layerID, checksums := range layersMetadata {
-		source, ok := layersTarget[layerID]
-		if !ok {
-			continue
-		}
-		files, found := checksums[file.Digest]
-		if !found {
-			continue
-		}
-		for _, candidate := range files {
-			// check if it is a valid candidate to dedup file
-			if useHardLinks && !canDedupMetadataWithHardLink(file, candidate) {
-				continue
-			}
-
-			found, dstFile, written, err := copyFileFromOtherLayer(file, source, candidate, dirfd, useHardLinks)
-			if found && err == nil {
-				return found, dstFile, written, err
-			}
-		}
-	}
-	// If hard links deduplication was used and it has failed, try again without hard links.
-	if useHardLinks {
-		return findFileInOtherLayers(file, dirfd, layersMetadata, layersTarget, false)
-	}
-	return false, nil, 0, nil
-}
-
-func getFileDigest(f *os.File) (digest.Digest, error) {
+func getFileDigest(f *os.File, buf []byte) (digest.Digest, error) {
 	digester := digest.Canonical.Digester()
-	if _, err := io.Copy(digester.Hash(), f); err != nil {
+	if _, err := io.CopyBuffer(digester.Hash(), f, buf); err != nil {
 		return "", err
 	}
 	return digester.Digest(), nil
@@ -408,7 +336,7 @@ func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, di
 // file is the file to look for.
 // dirfd is an open fd to the destination checkout.
 // useHardLinks defines whether the deduplication can be performed using hard links.
-func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool, buf []byte) (bool, *os.File, int64, error) {
 	sourceFile := filepath.Clean(filepath.Join("/", file.Name))
 	if !strings.HasPrefix(sourceFile, "/usr/") {
 		// limit host deduplication to files under /usr.
@@ -437,7 +365,7 @@ func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool
 		return false, nil, 0, err
 	}
 
-	checksum, err := getFileDigest(f)
+	checksum, err := getFileDigest(f, buf)
 	if err != nil {
 		return false, nil, 0, err
 	}
@@ -459,7 +387,7 @@ func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool
 		dstFile.Close()
 		return false, nil, 0, err
 	}
-	checksum, err = getFileDigest(f)
+	checksum, err = getFileDigest(f, buf)
 	if err != nil {
 		dstFile.Close()
 		return false, nil, 0, err
@@ -471,6 +399,19 @@ func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool
 	return true, dstFile, written, nil
 }
 
+// findFileInOtherLayers finds the specified file in other layers.
+// cache is the layers cache to use.
+// file is the file to look for.
+// dirfd is an open file descriptor to the checkout root directory.
+// useHardLinks defines whether the deduplication can be performed using hard links.
+func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+	target, name, err := cache.findFileInOtherLayers(file, useHardLinks)
+	if err != nil || name == "" {
+		return false, nil, 0, err
+	}
+	return copyFileFromOtherLayer(file, target, name, dirfd, useHardLinks)
+}
+
 func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error {
 	if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 {
 		return nil
@@ -497,18 +438,46 @@ func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOption
 	return nil
 }
 
-type missingFile struct {
-	File *internal.FileMetadata
+type originFile struct {
+	Root   string
+	Path   string
+	Offset int64
+}
+
+type missingFileChunk struct {
 	Gap  int64
+	Hole bool
+
+	File *internal.FileMetadata
+
+	CompressedSize   int64
+	UncompressedSize int64
 }
 
-func (m missingFile) Length() int64 {
-	return m.File.EndOffset - m.File.Offset
+type missingPart struct {
+	Hole        bool
+	SourceChunk *ImageSourceChunk
+	OriginFile  *originFile
+	Chunks      []missingFileChunk
 }
 
-type missingChunk struct {
-	RawChunk ImageSourceChunk
-	Files    []missingFile
+func (o *originFile) OpenFile() (io.ReadCloser, error) {
+	srcDirfd, err := unix.Open(o.Root, unix.O_RDONLY, 0)
+	if err != nil {
+		return nil, fmt.Errorf("open source file: %w", err)
+	}
+	defer unix.Close(srcDirfd)
+
+	srcFile, err := openFileUnderRoot(o.Path, srcDirfd, unix.O_RDONLY, 0)
+	if err != nil {
+		return nil, fmt.Errorf("open source file under target rootfs: %w", err)
+	}
+
+	if _, err := srcFile.Seek(o.Offset, 0); err != nil {
+		srcFile.Close()
+		return nil, err
+	}
+	return srcFile, nil
 }
 
 // setFileAttrs sets the file attributes for file given metadata
@@ -711,7 +680,7 @@ func openFileUnderRoot(name string, dirfd int, flags uint64, mode os.FileMode) (
 			newDirfd, err2 := openOrCreateDirUnderRoot(parent, dirfd, 0)
 			if err2 == nil {
 				defer newDirfd.Close()
-				fd, err := openFileUnderRootRaw(dirfd, name, flags, mode)
+				fd, err := openFileUnderRootRaw(int(newDirfd.Fd()), filepath.Base(name), flags, mode)
 				if err == nil {
 					return os.NewFile(uintptr(fd), name), nil
 				}
@@ -755,159 +724,367 @@ func openOrCreateDirUnderRoot(name string, dirfd int, mode os.FileMode) (*os.Fil
 	return nil, err
 }
 
-func (c *chunkedDiffer) createFileFromCompressedStream(dest string, dirfd int, reader io.Reader, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) (err error) {
-	file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
-	if err != nil {
-		return err
+func (c *chunkedDiffer) prepareCompressedStreamToFile(partCompression compressedFileType, from io.Reader, mf *missingFileChunk) (compressedFileType, error) {
+	switch {
+	case partCompression == fileTypeHole:
+		// The entire part is a hole.  Do not need to read from a file.
+		c.rawReader = nil
+		return fileTypeHole, nil
+	case mf.Hole:
+		// Only the missing chunk in the requested part refers to a hole.
+		// The received data must be discarded.
+		limitReader := io.LimitReader(from, mf.CompressedSize)
+		_, err := io.CopyBuffer(ioutil.Discard, limitReader, c.copyBuffer)
+		return fileTypeHole, err
+	case partCompression == fileTypeZstdChunked:
+		c.rawReader = io.LimitReader(from, mf.CompressedSize)
+		if c.zstdReader == nil {
+			r, err := zstd.NewReader(c.rawReader)
+			if err != nil {
+				return partCompression, err
+			}
+			c.zstdReader = r
+		} else {
+			if err := c.zstdReader.Reset(c.rawReader); err != nil {
+				return partCompression, err
+			}
+		}
+	case partCompression == fileTypeEstargz:
+		c.rawReader = io.LimitReader(from, mf.CompressedSize)
+		if c.gzipReader == nil {
+			r, err := pgzip.NewReader(c.rawReader)
+			if err != nil {
+				return partCompression, err
+			}
+			c.gzipReader = r
+		} else {
+			if err := c.gzipReader.Reset(c.rawReader); err != nil {
+				return partCompression, err
+			}
+		}
+	case partCompression == fileTypeNoCompression:
+		c.rawReader = io.LimitReader(from, mf.UncompressedSize)
+	default:
+		return partCompression, fmt.Errorf("unknown file type %q", c.fileType)
 	}
-	defer func() {
-		err2 := file.Close()
-		if err == nil {
-			err = err2
+	return partCompression, nil
+}
+
+// hashHole writes SIZE zeros to the specified hasher
+func hashHole(h hash.Hash, size int64, copyBuffer []byte) error {
+	count := int64(len(copyBuffer))
+	if size < count {
+		count = size
+	}
+	for i := int64(0); i < count; i++ {
+		copyBuffer[i] = 0
+	}
+	for size > 0 {
+		count = int64(len(copyBuffer))
+		if size < count {
+			count = size
 		}
-	}()
+		if _, err := h.Write(copyBuffer[:count]); err != nil {
+			return err
+		}
+		size -= count
+	}
+	return nil
+}
 
-	digester := digest.Canonical.Digester()
-	checksum := digester.Hash()
-	to := io.MultiWriter(file, checksum)
+// appendHole creates a hole with the specified size at the open fd.
+func appendHole(fd int, size int64) error {
+	off, err := unix.Seek(fd, size, unix.SEEK_CUR)
+	if err != nil {
+		return err
+	}
+	// Make sure the file size is changed.  It might be the last hole and no other data written afterwards.
+	if err := unix.Ftruncate(fd, off); err != nil {
+		return err
+	}
+	return nil
+}
 
-	switch c.fileType {
+func (c *chunkedDiffer) appendCompressedStreamToFile(compression compressedFileType, destFile *destinationFile, size int64) error {
+	switch compression {
 	case fileTypeZstdChunked:
-		z, err := zstd.NewReader(reader)
-		if err != nil {
+		defer c.zstdReader.Reset(nil)
+		if _, err := io.CopyBuffer(destFile.to, io.LimitReader(c.zstdReader, size), c.copyBuffer); err != nil {
 			return err
 		}
-		defer z.Close()
-
-		if _, err := io.Copy(to, io.LimitReader(z, metadata.Size)); err != nil {
+	case fileTypeEstargz:
+		defer c.gzipReader.Close()
+		if _, err := io.CopyBuffer(destFile.to, io.LimitReader(c.gzipReader, size), c.copyBuffer); err != nil {
 			return err
 		}
-		if _, err := io.Copy(ioutil.Discard, reader); err != nil {
+	case fileTypeNoCompression:
+		if _, err := io.CopyBuffer(destFile.to, io.LimitReader(c.rawReader, size), c.copyBuffer); err != nil {
 			return err
 		}
-	case fileTypeEstargz:
-		if c.gzipReader == nil {
-			r, err := pgzip.NewReader(reader)
-			if err != nil {
-				return err
-			}
-			c.gzipReader = r
-		} else {
-			if err := c.gzipReader.Reset(reader); err != nil {
-				return err
-			}
-		}
-		defer c.gzipReader.Close()
-
-		if _, err := io.Copy(to, io.LimitReader(c.gzipReader, metadata.Size)); err != nil {
+	case fileTypeHole:
+		if err := appendHole(int(destFile.file.Fd()), size); err != nil {
 			return err
 		}
-		if _, err := io.Copy(ioutil.Discard, reader); err != nil {
+		if err := hashHole(destFile.hash, size, c.copyBuffer); err != nil {
 			return err
 		}
 	default:
 		return fmt.Errorf("unknown file type %q", c.fileType)
 	}
+	return nil
+}
+
+type destinationFile struct {
+	dirfd    int
+	file     *os.File
+	digester digest.Digester
+	hash     hash.Hash
+	to       io.Writer
+	metadata *internal.FileMetadata
+	options  *archive.TarOptions
+}
+
+func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *archive.TarOptions) (*destinationFile, error) {
+	file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	digester := digest.Canonical.Digester()
+	hash := digester.Hash()
+	to := io.MultiWriter(file, hash)
+
+	return &destinationFile{
+		file:     file,
+		digester: digester,
+		hash:     hash,
+		to:       to,
+		metadata: metadata,
+		options:  options,
+		dirfd:    dirfd,
+	}, nil
+}
 
-	manifestChecksum, err := digest.Parse(metadata.Digest)
+func (d *destinationFile) Close() error {
+	manifestChecksum, err := digest.Parse(d.metadata.Digest)
 	if err != nil {
 		return err
 	}
-	if digester.Digest() != manifestChecksum {
-		return fmt.Errorf("checksum mismatch for %q", dest)
+	if d.digester.Digest() != manifestChecksum {
+		return fmt.Errorf("checksum mismatch for %q (got %q instead of %q)", d.file.Name(), d.digester.Digest(), manifestChecksum)
 	}
-	return setFileAttrs(dirfd, file, mode, metadata, options, false)
+
+	return setFileAttrs(d.dirfd, d.file, os.FileMode(d.metadata.Mode), d.metadata, d.options, false)
 }
 
-func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
-	for mc := 0; ; mc++ {
-		var part io.ReadCloser
-		select {
-		case p := <-streams:
-			part = p
-		case err := <-errs:
-			return err
-		}
-		if part == nil {
-			if mc == len(missingChunks) {
-				break
+func closeDestinationFiles(files chan *destinationFile, errors chan error) {
+	for f := range files {
+		errors <- f.Close()
+	}
+	close(errors)
+}
+
+func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) (Err error) {
+	var destFile *destinationFile
+
+	filesToClose := make(chan *destinationFile, 3)
+	closeFilesErrors := make(chan error, 2)
+
+	go closeDestinationFiles(filesToClose, closeFilesErrors)
+	defer func() {
+		close(filesToClose)
+		for e := range closeFilesErrors {
+			if e != nil && Err == nil {
+				Err = e
 			}
-			return errors.Errorf("invalid stream returned")
 		}
-		if mc == len(missingChunks) {
-			part.Close()
-			return errors.Errorf("too many chunks returned")
+	}()
+
+	for _, missingPart := range missingParts {
+		var part io.ReadCloser
+		partCompression := c.fileType
+		switch {
+		case missingPart.Hole:
+			partCompression = fileTypeHole
+		case missingPart.OriginFile != nil:
+			var err error
+			part, err = missingPart.OriginFile.OpenFile()
+			if err != nil {
+				return err
+			}
+			partCompression = fileTypeNoCompression
+		case missingPart.SourceChunk != nil:
+			select {
+			case p := <-streams:
+				part = p
+			case err := <-errs:
+				return err
+			}
+			if part == nil {
+				return errors.Errorf("invalid stream returned")
+			}
+		default:
+			return errors.Errorf("internal error: missing part misses both local and remote data stream")
 		}
 
-		for _, mf := range missingChunks[mc].Files {
+		for _, mf := range missingPart.Chunks {
 			if mf.Gap > 0 {
 				limitReader := io.LimitReader(part, mf.Gap)
-				_, err := io.Copy(ioutil.Discard, limitReader)
+				_, err := io.CopyBuffer(ioutil.Discard, limitReader, c.copyBuffer)
 				if err != nil {
-					part.Close()
-					return err
+					Err = err
+					goto exit
 				}
 				continue
 			}
 
-			limitReader := io.LimitReader(part, mf.Length())
+			if mf.File.Name == "" {
+				Err = errors.Errorf("file name empty")
+				goto exit
+			}
 
-			if err := c.createFileFromCompressedStream(dest, dirfd, limitReader, os.FileMode(mf.File.Mode), mf.File, options); err != nil {
-				part.Close()
-				return err
+			compression, err := c.prepareCompressedStreamToFile(partCompression, part, &mf)
+			if err != nil {
+				Err = err
+				goto exit
+			}
+
+			// Open the new file if it is different that what is already
+			// opened
+			if destFile == nil || destFile.metadata.Name != mf.File.Name {
+				var err error
+				if destFile != nil {
+				cleanup:
+					for {
+						select {
+						case err = <-closeFilesErrors:
+							if err != nil {
+								Err = err
+								goto exit
+							}
+						default:
+							break cleanup
+						}
+					}
+					filesToClose <- destFile
+				}
+				destFile, err = openDestinationFile(dirfd, mf.File, options)
+				if err != nil {
+					Err = err
+					goto exit
+				}
+			}
+
+			if err := c.appendCompressedStreamToFile(compression, destFile, mf.UncompressedSize); err != nil {
+				Err = err
+				goto exit
+			}
+			if c.rawReader != nil {
+				if _, err := io.CopyBuffer(ioutil.Discard, c.rawReader, c.copyBuffer); err != nil {
+					Err = err
+					goto exit
+				}
+			}
+		}
+	exit:
+		if part != nil {
+			part.Close()
+			if Err != nil {
+				break
 			}
 		}
-		part.Close()
 	}
+
+	if destFile != nil {
+		return destFile.Close()
+	}
+
 	return nil
 }
 
-func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk {
-	if len(missingChunks) <= target {
-		return missingChunks
+func mergeMissingChunks(missingParts []missingPart, target int) []missingPart {
+	getGap := func(missingParts []missingPart, i int) int {
+		prev := missingParts[i-1].SourceChunk.Offset + missingParts[i-1].SourceChunk.Length
+		return int(missingParts[i].SourceChunk.Offset - prev)
+	}
+	getCost := func(missingParts []missingPart, i int) int {
+		cost := getGap(missingParts, i)
+		if missingParts[i-1].OriginFile != nil {
+			cost += int(missingParts[i-1].SourceChunk.Length)
+		}
+		if missingParts[i].OriginFile != nil {
+			cost += int(missingParts[i].SourceChunk.Length)
+		}
+		return cost
+	}
+
+	// simple case: merge chunks from the same file.
+	newMissingParts := missingParts[0:1]
+	prevIndex := 0
+	for i := 1; i < len(missingParts); i++ {
+		gap := getGap(missingParts, i)
+		if gap == 0 && missingParts[prevIndex].OriginFile == nil &&
+			missingParts[i].OriginFile == nil &&
+			!missingParts[prevIndex].Hole && !missingParts[i].Hole &&
+			len(missingParts[prevIndex].Chunks) == 1 && len(missingParts[i].Chunks) == 1 &&
+			missingParts[prevIndex].Chunks[0].File.Name == missingParts[i].Chunks[0].File.Name {
+			missingParts[prevIndex].SourceChunk.Length += uint64(gap) + missingParts[i].SourceChunk.Length
+			missingParts[prevIndex].Chunks[0].CompressedSize += missingParts[i].Chunks[0].CompressedSize
+			missingParts[prevIndex].Chunks[0].UncompressedSize += missingParts[i].Chunks[0].UncompressedSize
+		} else {
+			newMissingParts = append(newMissingParts, missingParts[i])
+			prevIndex++
+		}
 	}
+	missingParts = newMissingParts
 
-	getGap := func(missingChunks []missingChunk, i int) int {
-		prev := missingChunks[i-1].RawChunk.Offset + missingChunks[i-1].RawChunk.Length
-		return int(missingChunks[i].RawChunk.Offset - prev)
+	if len(missingParts) <= target {
+		return missingParts
 	}
 
 	// this implementation doesn't account for duplicates, so it could merge
 	// more than necessary to reach the specified target.  Since target itself
 	// is a heuristic value, it doesn't matter.
-	var gaps []int
-	for i := 1; i < len(missingChunks); i++ {
-		gaps = append(gaps, getGap(missingChunks, i))
+	costs := make([]int, len(missingParts)-1)
+	for i := 1; i < len(missingParts); i++ {
+		costs[i-1] = getCost(missingParts, i)
 	}
-	sort.Ints(gaps)
+	sort.Ints(costs)
 
-	toShrink := len(missingChunks) - target
-	targetValue := gaps[toShrink-1]
+	toShrink := len(missingParts) - target
+	if toShrink >= len(costs) {
+		toShrink = len(costs) - 1
+	}
+	targetValue := costs[toShrink]
 
-	newMissingChunks := missingChunks[0:1]
-	for i := 1; i < len(missingChunks); i++ {
-		gap := getGap(missingChunks, i)
-		if gap > targetValue {
-			newMissingChunks = append(newMissingChunks, missingChunks[i])
+	newMissingParts = missingParts[0:1]
+	for i := 1; i < len(missingParts); i++ {
+		if getCost(missingParts, i) > targetValue {
+			newMissingParts = append(newMissingParts, missingParts[i])
 		} else {
-			prev := &newMissingChunks[len(newMissingChunks)-1]
-			prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length
+			gap := getGap(missingParts, i)
+			prev := &newMissingParts[len(newMissingParts)-1]
+			prev.SourceChunk.Length += uint64(gap) + missingParts[i].SourceChunk.Length
+			prev.Hole = false
+			prev.OriginFile = nil
 			if gap > 0 {
-				gapFile := missingFile{
+				gapFile := missingFileChunk{
 					Gap: int64(gap),
 				}
-				prev.Files = append(prev.Files, gapFile)
+				prev.Chunks = append(prev.Chunks, gapFile)
 			}
-			prev.Files = append(prev.Files, missingChunks[i].Files...)
+			prev.Chunks = append(prev.Chunks, missingParts[i].Chunks...)
 		}
 	}
-	return newMissingChunks
+	return newMissingParts
 }
 
-func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
+func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) error {
 	var chunksToRequest []ImageSourceChunk
-	for _, c := range missingChunks {
-		chunksToRequest = append(chunksToRequest, c.RawChunk)
+	for _, c := range missingParts {
+		if c.OriginFile == nil && !c.Hole {
+			chunksToRequest = append(chunksToRequest, *c.SourceChunk)
+		}
 	}
 
 	// There are some missing files.  Prepare a multirange request for the missing chunks.
@@ -921,20 +1098,20 @@ func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingChun
 		}
 
 		if _, ok := err.(ErrBadRequest); ok {
-			requested := len(missingChunks)
+			requested := len(missingParts)
 			// If the server cannot handle at least 64 chunks in a single request, just give up.
 			if requested < 64 {
 				return err
 			}
 
 			// Merge more chunks to request
-			missingChunks = mergeMissingChunks(missingChunks, requested/2)
+			missingParts = mergeMissingChunks(missingParts, requested/2)
 			continue
 		}
 		return err
 	}
 
-	if err := c.storeMissingFiles(streams, errs, dest, dirfd, missingChunks, options); err != nil {
+	if err := c.storeMissingFiles(streams, errs, dest, dirfd, missingParts, options); err != nil {
 		return err
 	}
 	return nil
@@ -960,7 +1137,7 @@ func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.File
 		}
 	}
 
-	file, err := openFileUnderRoot(name, dirfd, unix.O_DIRECTORY|unix.O_RDONLY, 0)
+	file, err := openFileUnderRoot(base, parentFd, unix.O_DIRECTORY|unix.O_RDONLY, 0)
 	if err != nil {
 		return err
 	}
@@ -1109,7 +1286,69 @@ func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bo
 	return def
 }
 
+type findAndCopyFileOptions struct {
+	useHardLinks    bool
+	enableHostDedup bool
+	ostreeRepos     []string
+	options         *archive.TarOptions
+}
+
+func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) {
+	finalizeFile := func(dstFile *os.File) error {
+		if dstFile != nil {
+			defer dstFile.Close()
+			if err := setFileAttrs(dirfd, dstFile, mode, r, copyOptions.options, false); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	found, dstFile, _, err := findFileInOtherLayers(c.layersCache, r, dirfd, copyOptions.useHardLinks)
+	if err != nil {
+		return false, err
+	}
+	if found {
+		if err := finalizeFile(dstFile); err != nil {
+			return false, err
+		}
+		return true, nil
+	}
+
+	found, dstFile, _, err = findFileInOSTreeRepos(r, copyOptions.ostreeRepos, dirfd, copyOptions.useHardLinks)
+	if err != nil {
+		return false, err
+	}
+	if found {
+		if err := finalizeFile(dstFile); err != nil {
+			return false, err
+		}
+		return true, nil
+	}
+
+	if copyOptions.enableHostDedup {
+		found, dstFile, _, err = findFileOnTheHost(r, dirfd, copyOptions.useHardLinks, c.copyBuffer)
+		if err != nil {
+			return false, err
+		}
+		if found {
+			if err := finalizeFile(dstFile); err != nil {
+				return false, err
+			}
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
 func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) {
+	defer c.layersCache.release()
+	defer func() {
+		if c.zstdReader != nil {
+			c.zstdReader.Close()
+		}
+	}()
+
 	bigData := map[string][]byte{
 		bigDataKey: c.manifest,
 	}
@@ -1137,14 +1376,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
 	ostreeRepos := strings.Split(storeOpts.PullOptions["ostree_repos"], ":")
 
 	// Generate the manifest
-	var toc internal.TOC
-	if err := json.Unmarshal(c.manifest, &toc); err != nil {
+	toc, err := unmarshalToc(c.manifest)
+	if err != nil {
 		return output, err
 	}
 
 	whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData)
 
-	var missingChunks []missingChunk
+	var missingParts []missingPart
 
 	mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries)
 	if err != nil {
@@ -1170,13 +1409,57 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
 	}
 	defer unix.Close(dirfd)
 
-	otherLayersCache := prepareOtherLayersCache(c.layersMetadata)
-
 	// hardlinks can point to missing files.  So create them after all files
 	// are retrieved
 	var hardLinks []hardLinkToCreate
 
-	missingChunksSize, totalChunksSize := int64(0), int64(0)
+	missingPartsSize, totalChunksSize := int64(0), int64(0)
+
+	copyOptions := findAndCopyFileOptions{
+		useHardLinks:    useHardLinks,
+		enableHostDedup: enableHostDedup,
+		ostreeRepos:     ostreeRepos,
+		options:         options,
+	}
+
+	type copyFileJob struct {
+		njob     int
+		index    int
+		mode     os.FileMode
+		metadata *internal.FileMetadata
+
+		found bool
+		err   error
+	}
+
+	var wg sync.WaitGroup
+
+	copyResults := make([]copyFileJob, len(mergedEntries))
+
+	copyFileJobs := make(chan copyFileJob)
+	defer func() {
+		if copyFileJobs != nil {
+			close(copyFileJobs)
+		}
+		wg.Wait()
+	}()
+
+	for i := 0; i < copyGoRoutines; i++ {
+		wg.Add(1)
+		jobs := copyFileJobs
+
+		go func() {
+			defer wg.Done()
+			for job := range jobs {
+				found, err := c.findAndCopyFile(dirfd, job.metadata, &copyOptions, job.mode)
+				job.err = err
+				job.found = found
+				copyResults[job.njob] = job
+			}
+		}()
+	}
+
+	filesToWaitFor := 0
 	for i, r := range mergedEntries {
 		if options.ForceMask != nil {
 			value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&07777)
@@ -1272,74 +1555,95 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
 
 		totalChunksSize += r.Size
 
-		finalizeFile := func(dstFile *os.File) error {
-			if dstFile != nil {
-				defer dstFile.Close()
-				if err := setFileAttrs(dirfd, dstFile, mode, &r, options, false); err != nil {
-					return err
-				}
+		if t == tar.TypeReg {
+			index := i
+			njob := filesToWaitFor
+			job := copyFileJob{
+				mode:     mode,
+				metadata: &mergedEntries[index],
+				index:    index,
+				njob:     njob,
 			}
-			return nil
+			copyFileJobs <- job
+			filesToWaitFor++
 		}
+	}
 
-		found, dstFile, _, err := findFileInOtherLayers(&r, dirfd, otherLayersCache, c.layersTarget, useHardLinks)
-		if err != nil {
-			return output, err
-		}
-		if found {
-			if err := finalizeFile(dstFile); err != nil {
-				return output, err
-			}
-			continue
-		}
+	close(copyFileJobs)
+	copyFileJobs = nil
 
-		found, dstFile, _, err = findFileInOSTreeRepos(&r, ostreeRepos, dirfd, useHardLinks)
-		if err != nil {
-			return output, err
+	wg.Wait()
+
+	for _, res := range copyResults[:filesToWaitFor] {
+		if res.err != nil {
+			return output, res.err
 		}
-		if found {
-			if err := finalizeFile(dstFile); err != nil {
-				return output, err
-			}
+		// the file was already copied to its destination
+		// so nothing left to do.
+		if res.found {
 			continue
 		}
 
-		if enableHostDedup {
-			found, dstFile, _, err = findFileOnTheHost(&r, dirfd, useHardLinks)
-			if err != nil {
-				return output, err
-			}
-			if found {
-				if err := finalizeFile(dstFile); err != nil {
-					return output, err
-				}
-				continue
+		r := &mergedEntries[res.index]
+
+		missingPartsSize += r.Size
+
+		remainingSize := r.Size
+
+		// the file is missing, attempt to find individual chunks.
+		for _, chunk := range r.Chunks {
+			compressedSize := int64(chunk.EndOffset - chunk.Offset)
+			size := remainingSize
+			if chunk.ChunkSize > 0 {
+				size = chunk.ChunkSize
 			}
-		}
+			remainingSize = remainingSize - size
 
-		missingChunksSize += r.Size
-		if t == tar.TypeReg {
 			rawChunk := ImageSourceChunk{
-				Offset: uint64(r.Offset),
-				Length: uint64(r.EndOffset - r.Offset),
+				Offset: uint64(chunk.Offset),
+				Length: uint64(compressedSize),
 			}
-
-			file := missingFile{
-				File: &mergedEntries[i],
+			file := missingFileChunk{
+				File:             &mergedEntries[res.index],
+				CompressedSize:   compressedSize,
+				UncompressedSize: size,
 			}
-
-			missingChunks = append(missingChunks, missingChunk{
-				RawChunk: rawChunk,
-				Files: []missingFile{
+			mp := missingPart{
+				SourceChunk: &rawChunk,
+				Chunks: []missingFileChunk{
 					file,
 				},
-			})
+			}
+
+			switch chunk.ChunkType {
+			case internal.ChunkTypeData:
+				root, path, offset, err := c.layersCache.findChunkInOtherLayers(chunk)
+				if err != nil {
+					return output, err
+				}
+				if offset >= 0 && validateChunkChecksum(chunk, root, path, offset, c.copyBuffer) {
+					missingPartsSize -= size
+					mp.OriginFile = &originFile{
+						Root:   root,
+						Path:   path,
+						Offset: offset,
+					}
+				}
+			case internal.ChunkTypeZeros:
+				missingPartsSize -= size
+				mp.Hole = true
+				// Mark all chunks belonging to the missing part as holes
+				for i := range mp.Chunks {
+					mp.Chunks[i].Hole = true
+				}
+			}
+			missingParts = append(missingParts, mp)
 		}
 	}
 	// There are some missing files.  Prepare a multirange request for the missing chunks.
-	if len(missingChunks) > 0 {
-		missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks)
-		if err := c.retrieveMissingFiles(dest, dirfd, missingChunks, options); err != nil {
+	if len(missingParts) > 0 {
+		missingParts = mergeMissingChunks(missingParts, maxNumberMissingChunks)
+		if err := c.retrieveMissingFiles(dest, dirfd, missingParts, options); err != nil {
 			return output, err
 		}
 	}
@@ -1351,31 +1655,69 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
 	}
 
 	if totalChunksSize > 0 {
-		logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingChunksSize, totalChunksSize, float32(missingChunksSize*100.0)/float32(totalChunksSize))
+		logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingPartsSize, totalChunksSize, float32(missingPartsSize*100.0)/float32(totalChunksSize))
 	}
 	return output, nil
 }
 
+func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool {
+	// ignore the metadata files for the estargz format.
+	if fileType != fileTypeEstargz {
+		return false
+	}
+	switch e.Name {
+	// ignore the metadata files for the estargz format.
+	case estargz.PrefetchLandmark, estargz.NoPrefetchLandmark, estargz.TOCTarName:
+		return true
+	}
+	return false
+}
+
 func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]internal.FileMetadata, error) {
-	var mergedEntries []internal.FileMetadata
-	var prevEntry *internal.FileMetadata
-	for _, entry := range entries {
-		e := entry
+	countNextChunks := func(start int) int {
+		count := 0
+		for _, e := range entries[start:] {
+			if e.Type != TypeChunk {
+				return count
+			}
+			count++
+		}
+		return count
+	}
 
-		// ignore the metadata files for the estargz format.
-		if fileType == fileTypeEstargz && (e.Name == estargz.PrefetchLandmark || e.Name == estargz.NoPrefetchLandmark || e.Name == estargz.TOCTarName) {
+	size := 0
+	for _, entry := range entries {
+		if mustSkipFile(fileType, entry) {
 			continue
 		}
+		if entry.Type != TypeChunk {
+			size++
+		}
+	}
 
+	mergedEntries := make([]internal.FileMetadata, size)
+	m := 0
+	for i := 0; i < len(entries); i++ {
+		e := entries[i]
+		if mustSkipFile(fileType, e) {
+			continue
+		}
 		if e.Type == TypeChunk {
-			if prevEntry == nil || prevEntry.Type != TypeReg {
-				return nil, errors.New("chunk type without a regular file")
+			return nil, fmt.Errorf("chunk type without a regular file")
+		}
+
+		if e.Type == TypeReg {
+			nChunks := countNextChunks(i + 1)
+
+			e.Chunks = make([]*internal.FileMetadata, nChunks+1)
+			for j := 0; j <= nChunks; j++ {
+				e.Chunks[j] = &entries[i+j]
+				e.EndOffset = entries[i+j].EndOffset
 			}
-			prevEntry.EndOffset = e.EndOffset
-			continue
+			i += nChunks
 		}
-		mergedEntries = append(mergedEntries, e)
-		prevEntry = &e
+		mergedEntries[m] = e
+		m++
 	}
 	// stargz/estargz doesn't store EndOffset so let's calculate it here
 	lastOffset := c.tocOffset
@@ -1386,6 +1728,47 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
 		if mergedEntries[i].Offset != 0 {
 			lastOffset = mergedEntries[i].Offset
 		}
+
+		lastChunkOffset := mergedEntries[i].EndOffset
+		for j := len(mergedEntries[i].Chunks) - 1; j >= 0; j-- {
+			mergedEntries[i].Chunks[j].EndOffset = lastChunkOffset
+			mergedEntries[i].Chunks[j].Size = mergedEntries[i].Chunks[j].EndOffset - mergedEntries[i].Chunks[j].Offset
+			lastChunkOffset = mergedEntries[i].Chunks[j].Offset
+		}
 	}
 	return mergedEntries, nil
 }
+
+// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the
+// same digest as chunk.ChunkDigest
+func validateChunkChecksum(chunk *internal.FileMetadata, root, path string, offset int64, copyBuffer []byte) bool {
+	parentDirfd, err := unix.Open(root, unix.O_PATH, 0)
+	if err != nil {
+		return false
+	}
+	defer unix.Close(parentDirfd)
+
+	fd, err := openFileUnderRoot(path, parentDirfd, unix.O_RDONLY, 0)
+	if err != nil {
+		return false
+	}
+	defer fd.Close()
+
+	if _, err := unix.Seek(int(fd.Fd()), offset, 0); err != nil {
+		return false
+	}
+
+	r := io.LimitReader(fd, chunk.ChunkSize)
+	digester := digest.Canonical.Digester()
+
+	if _, err := io.CopyBuffer(digester.Hash(), r, copyBuffer); err != nil {
+		return false
+	}
+
+	digest, err := digest.Parse(chunk.ChunkDigest)
+	if err != nil {
+		return false
+	}
+
+	return digester.Digest() == digest
+}
diff --git a/vendor/github.com/containers/storage/pkg/idtools/idtools.go b/vendor/github.com/containers/storage/pkg/idtools/idtools.go
index 83e797599..0abe886eb 100644
--- a/vendor/github.com/containers/storage/pkg/idtools/idtools.go
+++ b/vendor/github.com/containers/storage/pkg/idtools/idtools.go
@@ -82,7 +82,7 @@ func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
 	if len(uidMap) == 1 && uidMap[0].Size == 1 {
 		uid = uidMap[0].HostID
 	} else {
-		uid, err = toHost(0, uidMap)
+		uid, err = RawToHost(0, uidMap)
 		if err != nil {
 			return -1, -1, err
 		}
@@ -90,7 +90,7 @@ func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
 	if len(gidMap) == 1 && gidMap[0].Size == 1 {
 		gid = gidMap[0].HostID
 	} else {
-		gid, err = toHost(0, gidMap)
+		gid, err = RawToHost(0, gidMap)
 		if err != nil {
 			return -1, -1, err
 		}
@@ -98,10 +98,14 @@ func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
 	return uid, gid, nil
 }
 
-// toContainer takes an id mapping, and uses it to translate a
-// host ID to the remapped ID. If no map is provided, then the translation
-// assumes a 1-to-1 mapping and returns the passed in id
-func toContainer(hostID int, idMap []IDMap) (int, error) {
+// RawToContainer takes an id mapping, and uses it to translate a host ID to
+// the remapped ID. If no map is provided, then the translation assumes a
+// 1-to-1 mapping and returns the passed in id.
+//
+// If you wish to map a (uid,gid) combination you should use the corresponding
+// IDMappings methods, which ensure that you are mapping the correct ID against
+// the correct mapping.
+func RawToContainer(hostID int, idMap []IDMap) (int, error) {
 	if idMap == nil {
 		return hostID, nil
 	}
@@ -114,10 +118,14 @@ func toContainer(hostID int, idMap []IDMap) (int, error) {
 	return -1, fmt.Errorf("Host ID %d cannot be mapped to a container ID", hostID)
 }
 
-// toHost takes an id mapping and a remapped ID, and translates the
-// ID to the mapped host ID. If no map is provided, then the translation
-// assumes a 1-to-1 mapping and returns the passed in id #
-func toHost(contID int, idMap []IDMap) (int, error) {
+// RawToHost takes an id mapping and a remapped ID, and translates the ID to
+// the mapped host ID. If no map is provided, then the translation assumes a
+// 1-to-1 mapping and returns the passed in id.
+//
+// If you wish to map a (uid,gid) combination you should use the corresponding
+// IDMappings methods, which ensure that you are mapping the correct ID against
+// the correct mapping.
+func RawToHost(contID int, idMap []IDMap) (int, error) {
 	if idMap == nil {
 		return contID, nil
 	}
@@ -187,22 +195,22 @@ func (i *IDMappings) ToHost(pair IDPair) (IDPair, error) {
 	var err error
 	var target IDPair
 
-	target.UID, err = toHost(pair.UID, i.uids)
+	target.UID, err = RawToHost(pair.UID, i.uids)
 	if err != nil {
 		return target, err
 	}
 
-	target.GID, err = toHost(pair.GID, i.gids)
+	target.GID, err = RawToHost(pair.GID, i.gids)
 	return target, err
 }
 
 // ToContainer returns the container UID and GID for the host uid and gid
 func (i *IDMappings) ToContainer(pair IDPair) (int, int, error) {
-	uid, err := toContainer(pair.UID, i.uids)
+	uid, err := RawToContainer(pair.UID, i.uids)
 	if err != nil {
 		return -1, -1, err
 	}
-	gid, err := toContainer(pair.GID, i.gids)
+	gid, err := RawToContainer(pair.GID, i.gids)
 	return uid, gid, err
 }
 
diff --git a/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go b/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go
index e444a1bcc..6e6e3b22b 100644
--- a/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go
+++ b/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go
@@ -12,10 +12,14 @@ import (
 #cgo LDFLAGS: -l subid
 #include <shadow/subid.h>
 #include <stdlib.h>
+#include <stdio.h>
 const char *Prog = "storage";
+FILE *shadow_logfd = NULL;
+
 struct subid_range get_range(struct subid_range *ranges, int i)
 {
-    return ranges[i];
+	shadow_logfd = stderr;
+	return ranges[i];
 }
 
 #if !defined(SUBID_ABI_MAJOR) || (SUBID_ABI_MAJOR < 4)
author	Aditya R <arajan@redhat.com>	2022-01-20 12:40:07 +0530
committer	Aditya R <arajan@redhat.com>	2022-01-20 12:40:11 +0530
commit	2c492be00a13bfbc389d2b1b97c6bf91520e280e (patch)
tree	a0603d66b29dcc9ab91354ef583ba5e349f8409f /vendor/github.com/containers/storage/pkg
parent	f46478c1e9af601759e341de76d4c655b4a66068 (diff)