summaryrefslogtreecommitdiff
path: root/vendor/github.com/containers/storage/pkg
diff options
context:
space:
mode:
authorAditya R <arajan@redhat.com>2022-01-20 12:40:07 +0530
committerAditya R <arajan@redhat.com>2022-01-20 12:40:11 +0530
commit2c492be00a13bfbc389d2b1b97c6bf91520e280e (patch)
treea0603d66b29dcc9ab91354ef583ba5e349f8409f /vendor/github.com/containers/storage/pkg
parentf46478c1e9af601759e341de76d4c655b4a66068 (diff)
vendor: bump c/common and other vendors
This commit bumps majorly c/common so netavark features could be synced with podman. But there are some other vendor bumps as well [NO NEW TESTS NEEDED] [NO TESTS NEEDED] Signed-off-by: Aditya R <arajan@redhat.com>
Diffstat (limited to 'vendor/github.com/containers/storage/pkg')
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/cache_linux.go630
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go310
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/compressor/rollsum.go81
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/internal/compression.go32
-rw-r--r--vendor/github.com/containers/storage/pkg/chunked/storage_linux.go961
-rw-r--r--vendor/github.com/containers/storage/pkg/idtools/idtools.go36
-rw-r--r--vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go6
7 files changed, 1704 insertions, 352 deletions
diff --git a/vendor/github.com/containers/storage/pkg/chunked/cache_linux.go b/vendor/github.com/containers/storage/pkg/chunked/cache_linux.go
new file mode 100644
index 000000000..a931fb5d1
--- /dev/null
+++ b/vendor/github.com/containers/storage/pkg/chunked/cache_linux.go
@@ -0,0 +1,630 @@
+package chunked
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+ "unsafe"
+
+ storage "github.com/containers/storage"
+ "github.com/containers/storage/pkg/chunked/internal"
+ "github.com/containers/storage/pkg/ioutils"
+ jsoniter "github.com/json-iterator/go"
+ digest "github.com/opencontainers/go-digest"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+const (
+ cacheKey = "chunked-manifest-cache"
+ cacheVersion = 1
+)
+
+type metadata struct {
+ tagLen int
+ digestLen int
+ tags []byte
+ vdata []byte
+}
+
+type layer struct {
+ id string
+ metadata *metadata
+ target string
+}
+
+type layersCache struct {
+ layers []layer
+ refs int
+ store storage.Store
+ mutex sync.RWMutex
+ created time.Time
+}
+
+var cacheMutex sync.Mutex
+var cache *layersCache
+
+func (c *layersCache) release() {
+ cacheMutex.Lock()
+ defer cacheMutex.Unlock()
+
+ c.refs--
+ if c.refs == 0 {
+ cache = nil
+ }
+}
+
+func getLayersCacheRef(store storage.Store) *layersCache {
+ cacheMutex.Lock()
+ defer cacheMutex.Unlock()
+ if cache != nil && cache.store == store && time.Since(cache.created).Minutes() < 10 {
+ cache.refs++
+ return cache
+ }
+ cache := &layersCache{
+ store: store,
+ refs: 1,
+ created: time.Now(),
+ }
+ return cache
+}
+
+func getLayersCache(store storage.Store) (*layersCache, error) {
+ c := getLayersCacheRef(store)
+
+ if err := c.load(); err != nil {
+ c.release()
+ return nil, err
+ }
+ return c, nil
+}
+
+func (c *layersCache) load() error {
+ c.mutex.Lock()
+ defer c.mutex.Unlock()
+
+ allLayers, err := c.store.Layers()
+ if err != nil {
+ return err
+ }
+ existingLayers := make(map[string]string)
+ for _, r := range c.layers {
+ existingLayers[r.id] = r.target
+ }
+
+ currentLayers := make(map[string]string)
+ for _, r := range allLayers {
+ currentLayers[r.ID] = r.ID
+ if _, found := existingLayers[r.ID]; found {
+ continue
+ }
+
+ bigData, err := c.store.LayerBigData(r.ID, cacheKey)
+ if err != nil {
+ if errors.Cause(err) == os.ErrNotExist {
+ continue
+ }
+ return err
+ }
+ defer bigData.Close()
+
+ metadata, err := readMetadataFromCache(bigData)
+ if err != nil {
+ logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err)
+ }
+
+ if metadata != nil {
+ c.addLayer(r.ID, metadata)
+ continue
+ }
+
+ manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey)
+ if err != nil {
+ continue
+ }
+ defer manifestReader.Close()
+ manifest, err := ioutil.ReadAll(manifestReader)
+ if err != nil {
+ return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
+ }
+
+ metadata, err = writeCache(manifest, r.ID, c.store)
+ if err == nil {
+ c.addLayer(r.ID, metadata)
+ }
+ }
+
+ var newLayers []layer
+ for _, l := range c.layers {
+ if _, found := currentLayers[l.id]; found {
+ newLayers = append(newLayers, l)
+ }
+ }
+ c.layers = newLayers
+
+ return nil
+}
+
+// calculateHardLinkFingerprint calculates a hash that can be used to verify if a file
+// is usable for deduplication with hardlinks.
+// To calculate the digest, it uses the file payload digest, UID, GID, mode and xattrs.
+func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) {
+ digester := digest.Canonical.Digester()
+
+ modeString := fmt.Sprintf("%d:%d:%o", f.UID, f.GID, f.Mode)
+ hash := digester.Hash()
+
+ if _, err := hash.Write([]byte(f.Digest)); err != nil {
+ return "", err
+ }
+
+ if _, err := hash.Write([]byte(modeString)); err != nil {
+ return "", err
+ }
+
+ if len(f.Xattrs) > 0 {
+ keys := make([]string, 0, len(f.Xattrs))
+ for k := range f.Xattrs {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+
+ for _, k := range keys {
+ if _, err := hash.Write([]byte(k)); err != nil {
+ return "", err
+ }
+ if _, err := hash.Write([]byte(f.Xattrs[k])); err != nil {
+ return "", err
+ }
+ }
+ }
+ return string(digester.Digest()), nil
+}
+
+// generateFileLocation generates a file location in the form $OFFSET@$PATH
+func generateFileLocation(path string, offset uint64) []byte {
+ return []byte(fmt.Sprintf("%d@%s", offset, path))
+}
+
+// generateTag generates a tag in the form $DIGEST$OFFSET@LEN.
+// the [OFFSET; LEN] points to the variable length data where the file locations
+// are stored. $DIGEST has length digestLen stored in the metadata file header.
+func generateTag(digest string, offset, len uint64) string {
+ return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len)
+}
+
+type setBigData interface {
+ // SetLayerBigData stores a (possibly large) chunk of named data
+ SetLayerBigData(id, key string, data io.Reader) error
+}
+
+// writeCache write a cache for the layer ID.
+// It generates a sorted list of digests with their offset to the path location and offset.
+// The same cache is used to lookup files, chunks and candidates for deduplication with hard links.
+// There are 3 kind of digests stored:
+// - digest(file.payload))
+// - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs)
+// - digest(i) for each i in chunks(file payload)
+func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) {
+ var vdata bytes.Buffer
+ tagLen := 0
+ digestLen := 0
+ var tagsBuffer bytes.Buffer
+
+ toc, err := prepareMetadata(manifest)
+ if err != nil {
+ return nil, err
+ }
+
+ var tags []string
+ for _, k := range toc {
+ if k.Digest != "" {
+ location := generateFileLocation(k.Name, 0)
+
+ off := uint64(vdata.Len())
+ l := uint64(len(location))
+
+ d := generateTag(k.Digest, off, l)
+ if tagLen == 0 {
+ tagLen = len(d)
+ }
+ if tagLen != len(d) {
+ return nil, errors.New("digest with different length found")
+ }
+ tags = append(tags, d)
+
+ fp, err := calculateHardLinkFingerprint(k)
+ if err != nil {
+ return nil, err
+ }
+ d = generateTag(fp, off, l)
+ if tagLen != len(d) {
+ return nil, errors.New("digest with different length found")
+ }
+ tags = append(tags, d)
+
+ if _, err := vdata.Write(location); err != nil {
+ return nil, err
+ }
+
+ digestLen = len(k.Digest)
+ }
+ if k.ChunkDigest != "" {
+ location := generateFileLocation(k.Name, uint64(k.ChunkOffset))
+ off := uint64(vdata.Len())
+ l := uint64(len(location))
+ d := generateTag(k.ChunkDigest, off, l)
+ if tagLen == 0 {
+ tagLen = len(d)
+ }
+ if tagLen != len(d) {
+ return nil, errors.New("digest with different length found")
+ }
+ tags = append(tags, d)
+
+ if _, err := vdata.Write(location); err != nil {
+ return nil, err
+ }
+ digestLen = len(k.ChunkDigest)
+ }
+ }
+
+ sort.Strings(tags)
+
+ for _, t := range tags {
+ if _, err := tagsBuffer.Write([]byte(t)); err != nil {
+ return nil, err
+ }
+ }
+
+ pipeReader, pipeWriter := io.Pipe()
+ errChan := make(chan error, 1)
+ go func() {
+ defer pipeWriter.Close()
+ defer close(errChan)
+
+ // version
+ if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(cacheVersion)); err != nil {
+ errChan <- err
+ return
+ }
+
+ // len of a tag
+ if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagLen)); err != nil {
+ errChan <- err
+ return
+ }
+
+ // len of a digest
+ if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(digestLen)); err != nil {
+ errChan <- err
+ return
+ }
+
+ // tags length
+ if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil {
+ errChan <- err
+ return
+ }
+
+ // vdata length
+ if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(vdata.Len())); err != nil {
+ errChan <- err
+ return
+ }
+
+ // tags
+ if _, err := pipeWriter.Write(tagsBuffer.Bytes()); err != nil {
+ errChan <- err
+ return
+ }
+
+ // variable length data
+ if _, err := pipeWriter.Write(vdata.Bytes()); err != nil {
+ errChan <- err
+ return
+ }
+
+ errChan <- nil
+ }()
+ defer pipeReader.Close()
+
+ counter := ioutils.NewWriteCounter(ioutil.Discard)
+
+ r := io.TeeReader(pipeReader, counter)
+
+ if err := dest.SetLayerBigData(id, cacheKey, r); err != nil {
+ return nil, err
+ }
+
+ if err := <-errChan; err != nil {
+ return nil, err
+ }
+
+ logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count)
+
+ return &metadata{
+ digestLen: digestLen,
+ tagLen: tagLen,
+ tags: tagsBuffer.Bytes(),
+ vdata: vdata.Bytes(),
+ }, nil
+}
+
+func readMetadataFromCache(bigData io.Reader) (*metadata, error) {
+ var version, tagLen, digestLen, tagsLen, vdataLen uint64
+ if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil {
+ return nil, err
+ }
+ if version != cacheVersion {
+ return nil, nil
+ }
+ if err := binary.Read(bigData, binary.LittleEndian, &tagLen); err != nil {
+ return nil, err
+ }
+ if err := binary.Read(bigData, binary.LittleEndian, &digestLen); err != nil {
+ return nil, err
+ }
+ if err := binary.Read(bigData, binary.LittleEndian, &tagsLen); err != nil {
+ return nil, err
+ }
+ if err := binary.Read(bigData, binary.LittleEndian, &vdataLen); err != nil {
+ return nil, err
+ }
+
+ tags := make([]byte, tagsLen)
+ if _, err := bigData.Read(tags); err != nil {
+ return nil, err
+ }
+
+ vdata := make([]byte, vdataLen)
+ if _, err := bigData.Read(vdata); err != nil {
+ return nil, err
+ }
+
+ return &metadata{
+ tagLen: int(tagLen),
+ digestLen: int(digestLen),
+ tags: tags,
+ vdata: vdata,
+ }, nil
+}
+
+func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
+ toc, err := unmarshalToc(manifest)
+ if err != nil {
+ // ignore errors here. They might be caused by a different manifest format.
+ return nil, nil
+ }
+
+ var r []*internal.FileMetadata
+ chunkSeen := make(map[string]bool)
+ for i := range toc.Entries {
+ d := toc.Entries[i].Digest
+ if d != "" {
+ r = append(r, &toc.Entries[i])
+ continue
+ }
+
+ // chunks do not use hard link dedup so keeping just one candidate is enough
+ cd := toc.Entries[i].ChunkDigest
+ if cd != "" && !chunkSeen[cd] {
+ r = append(r, &toc.Entries[i])
+ chunkSeen[cd] = true
+ }
+ }
+ return r, nil
+}
+
+func (c *layersCache) addLayer(id string, metadata *metadata) error {
+ target, err := c.store.DifferTarget(id)
+ if err != nil {
+ return fmt.Errorf("get checkout directory layer %q: %w", id, err)
+ }
+
+ l := layer{
+ id: id,
+ metadata: metadata,
+ target: target,
+ }
+ c.layers = append(c.layers, l)
+ return nil
+}
+
+func byteSliceAsString(b []byte) string {
+ return *(*string)(unsafe.Pointer(&b))
+}
+
+func findTag(digest string, metadata *metadata) (string, uint64, uint64) {
+ if len(digest) != metadata.digestLen {
+ return "", 0, 0
+ }
+
+ nElements := len(metadata.tags) / metadata.tagLen
+
+ i := sort.Search(nElements, func(i int) bool {
+ d := byteSliceAsString(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+metadata.digestLen])
+ return strings.Compare(d, digest) >= 0
+ })
+ if i < nElements {
+ d := string(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+len(digest)])
+ if digest == d {
+ startOff := i*metadata.tagLen + metadata.digestLen
+ parts := strings.Split(string(metadata.tags[startOff:(i+1)*metadata.tagLen]), "@")
+ off, _ := strconv.ParseInt(parts[0], 10, 64)
+ len, _ := strconv.ParseInt(parts[1], 10, 64)
+ return digest, uint64(off), uint64(len)
+ }
+ }
+ return "", 0, 0
+}
+
+func (c *layersCache) findDigestInternal(digest string) (string, string, int64, error) {
+ if digest == "" {
+ return "", "", -1, nil
+ }
+
+ c.mutex.RLock()
+ defer c.mutex.RUnlock()
+
+ for _, layer := range c.layers {
+ digest, off, len := findTag(digest, layer.metadata)
+ if digest != "" {
+ position := string(layer.metadata.vdata[off : off+len])
+ parts := strings.SplitN(position, "@", 2)
+ offFile, _ := strconv.ParseInt(parts[0], 10, 64)
+ return layer.target, parts[1], offFile, nil
+ }
+ }
+
+ return "", "", -1, nil
+}
+
+// findFileInOtherLayers finds the specified file in other layers.
+// file is the file to look for.
+func (c *layersCache) findFileInOtherLayers(file *internal.FileMetadata, useHardLinks bool) (string, string, error) {
+ digest := file.Digest
+ if useHardLinks {
+ var err error
+ digest, err = calculateHardLinkFingerprint(file)
+ if err != nil {
+ return "", "", err
+ }
+ }
+ target, name, off, err := c.findDigestInternal(digest)
+ if off == 0 {
+ return target, name, err
+ }
+ return "", "", nil
+}
+
+func (c *layersCache) findChunkInOtherLayers(chunk *internal.FileMetadata) (string, string, int64, error) {
+ return c.findDigestInternal(chunk.ChunkDigest)
+}
+
+func unmarshalToc(manifest []byte) (*internal.TOC, error) {
+ var buf bytes.Buffer
+ count := 0
+ var toc internal.TOC
+
+ iter := jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)
+ for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+ if field != "entries" {
+ iter.Skip()
+ continue
+ }
+ for iter.ReadArray() {
+ for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+ switch field {
+ case "type", "name", "linkName", "digest", "chunkDigest", "chunkType":
+ count += len(iter.ReadStringAsSlice())
+ case "xattrs":
+ for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
+ count += len(iter.ReadStringAsSlice())
+ }
+ default:
+ iter.Skip()
+ }
+ }
+ }
+ break
+ }
+
+ buf.Grow(count)
+
+ getString := func(b []byte) string {
+ from := buf.Len()
+ buf.Write(b)
+ to := buf.Len()
+ return byteSliceAsString(buf.Bytes()[from:to])
+ }
+
+ iter = jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)
+ for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+ if field == "version" {
+ toc.Version = iter.ReadInt()
+ continue
+ }
+ if field != "entries" {
+ iter.Skip()
+ continue
+ }
+ for iter.ReadArray() {
+ var m internal.FileMetadata
+ for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
+ switch field {
+ case "type":
+ m.Type = getString(iter.ReadStringAsSlice())
+ case "name":
+ m.Name = getString(iter.ReadStringAsSlice())
+ case "linkName":
+ m.Linkname = getString(iter.ReadStringAsSlice())
+ case "mode":
+ m.Mode = iter.ReadInt64()
+ case "size":
+ m.Size = iter.ReadInt64()
+ case "UID":
+ m.UID = iter.ReadInt()
+ case "GID":
+ m.GID = iter.ReadInt()
+ case "ModTime":
+ time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
+ if err != nil {
+ return nil, err
+ }
+ m.ModTime = &time
+ case "accesstime":
+ time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
+ if err != nil {
+ return nil, err
+ }
+ m.AccessTime = &time
+ case "changetime":
+ time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice()))
+ if err != nil {
+ return nil, err
+ }
+ m.ChangeTime = &time
+ case "devMajor":
+ m.Devmajor = iter.ReadInt64()
+ case "devMinor":
+ m.Devminor = iter.ReadInt64()
+ case "digest":
+ m.Digest = getString(iter.ReadStringAsSlice())
+ case "offset":
+ m.Offset = iter.ReadInt64()
+ case "endOffset":
+ m.EndOffset = iter.ReadInt64()
+ case "chunkSize":
+ m.ChunkSize = iter.ReadInt64()
+ case "chunkOffset":
+ m.ChunkOffset = iter.ReadInt64()
+ case "chunkDigest":
+ m.ChunkDigest = getString(iter.ReadStringAsSlice())
+ case "chunkType":
+ m.ChunkType = getString(iter.ReadStringAsSlice())
+ case "xattrs":
+ m.Xattrs = make(map[string]string)
+ for key := iter.ReadObject(); key != ""; key = iter.ReadObject() {
+ value := iter.ReadStringAsSlice()
+ m.Xattrs[key] = getString(value)
+ }
+ default:
+ iter.Skip()
+ }
+ }
+ toc.Entries = append(toc.Entries, m)
+ }
+ break
+ }
+ toc.StringsBuf = buf
+ return &toc, nil
+}
diff --git a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
index 092cf584a..aeb7cfd4f 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/compressor/compressor.go
@@ -5,6 +5,7 @@ package compressor
// larger software like the graph drivers.
import (
+ "bufio"
"encoding/base64"
"io"
"io/ioutil"
@@ -15,6 +16,189 @@ import (
"github.com/vbatts/tar-split/archive/tar"
)
+const RollsumBits = 16
+const holesThreshold = int64(1 << 10)
+
+type holesFinder struct {
+ reader *bufio.Reader
+ fileOff int64
+ zeros int64
+ from int64
+ threshold int64
+
+ state int
+}
+
+const (
+ holesFinderStateRead = iota
+ holesFinderStateAccumulate
+ holesFinderStateFound
+ holesFinderStateEOF
+)
+
+// ReadByte reads a single byte from the underlying reader.
+// If a single byte is read, the return value is (0, RAW-BYTE-VALUE, nil).
+// If there are at least f.THRESHOLD consecutive zeros, then the
+// return value is (N_CONSECUTIVE_ZEROS, '\x00').
+func (f *holesFinder) ReadByte() (int64, byte, error) {
+ for {
+ switch f.state {
+ // reading the file stream
+ case holesFinderStateRead:
+ if f.zeros > 0 {
+ f.zeros--
+ return 0, 0, nil
+ }
+ b, err := f.reader.ReadByte()
+ if err != nil {
+ return 0, b, err
+ }
+
+ if b != 0 {
+ return 0, b, err
+ }
+
+ f.zeros = 1
+ if f.zeros == f.threshold {
+ f.state = holesFinderStateFound
+ } else {
+ f.state = holesFinderStateAccumulate
+ }
+ // accumulating zeros, but still didn't reach the threshold
+ case holesFinderStateAccumulate:
+ b, err := f.reader.ReadByte()
+ if err != nil {
+ if err == io.EOF {
+ f.state = holesFinderStateEOF
+ continue
+ }
+ return 0, b, err
+ }
+
+ if b == 0 {
+ f.zeros++
+ if f.zeros == f.threshold {
+ f.state = holesFinderStateFound
+ }
+ } else {
+ if f.reader.UnreadByte(); err != nil {
+ return 0, 0, err
+ }
+ f.state = holesFinderStateRead
+ }
+ // found a hole. Number of zeros >= threshold
+ case holesFinderStateFound:
+ b, err := f.reader.ReadByte()
+ if err != nil {
+ if err == io.EOF {
+ f.state = holesFinderStateEOF
+ }
+ holeLen := f.zeros
+ f.zeros = 0
+ return holeLen, 0, nil
+ }
+ if b != 0 {
+ if f.reader.UnreadByte(); err != nil {
+ return 0, 0, err
+ }
+ f.state = holesFinderStateRead
+
+ holeLen := f.zeros
+ f.zeros = 0
+ return holeLen, 0, nil
+ }
+ f.zeros++
+ // reached EOF. Flush pending zeros if any.
+ case holesFinderStateEOF:
+ if f.zeros > 0 {
+ f.zeros--
+ return 0, 0, nil
+ }
+ return 0, 0, io.EOF
+ }
+ }
+}
+
+type rollingChecksumReader struct {
+ reader *holesFinder
+ closed bool
+ rollsum *RollSum
+ pendingHole int64
+
+ // WrittenOut is the total number of bytes read from
+ // the stream.
+ WrittenOut int64
+
+ // IsLastChunkZeros tells whether the last generated
+ // chunk is a hole (made of consecutive zeros). If it
+ // is false, then the last chunk is a data chunk
+ // generated by the rolling checksum.
+ IsLastChunkZeros bool
+}
+
+func (rc *rollingChecksumReader) Read(b []byte) (bool, int, error) {
+ rc.IsLastChunkZeros = false
+
+ if rc.pendingHole > 0 {
+ toCopy := int64(len(b))
+ if rc.pendingHole < toCopy {
+ toCopy = rc.pendingHole
+ }
+ rc.pendingHole -= toCopy
+ for i := int64(0); i < toCopy; i++ {
+ b[i] = 0
+ }
+
+ rc.WrittenOut += toCopy
+
+ rc.IsLastChunkZeros = true
+
+ // if there are no other zeros left, terminate the chunk
+ return rc.pendingHole == 0, int(toCopy), nil
+ }
+
+ if rc.closed {
+ return false, 0, io.EOF
+ }
+
+ for i := 0; i < len(b); i++ {
+ holeLen, n, err := rc.reader.ReadByte()
+ if err != nil {
+ if err == io.EOF {
+ rc.closed = true
+ if i == 0 {
+ return false, 0, err
+ }
+ return false, i, nil
+ }
+ // Report any other error type
+ return false, -1, err
+ }
+ if holeLen > 0 {
+ for j := int64(0); j < holeLen; j++ {
+ rc.rollsum.Roll(0)
+ }
+ rc.pendingHole = holeLen
+ return true, i, nil
+ }
+ b[i] = n
+ rc.WrittenOut++
+ rc.rollsum.Roll(n)
+ if rc.rollsum.OnSplitWithBits(RollsumBits) {
+ return true, i + 1, nil
+ }
+ }
+ return false, len(b), nil
+}
+
+type chunk struct {
+ ChunkOffset int64
+ Offset int64
+ Checksum string
+ ChunkSize int64
+ ChunkType string
+}
+
func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
// total written so far. Used to retrieve partial offsets in the file
dest := ioutils.NewWriteCounter(destFile)
@@ -64,40 +248,78 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
if _, err := zstdWriter.Write(rawBytes); err != nil {
return err
}
- payloadDigester := digest.Canonical.Digester()
- payloadChecksum := payloadDigester.Hash()
- payloadDest := io.MultiWriter(payloadChecksum, zstdWriter)
+ payloadDigester := digest.Canonical.Digester()
+ chunkDigester := digest.Canonical.Digester()
// Now handle the payload, if any
- var startOffset, endOffset int64
+ startOffset := int64(0)
+ lastOffset := int64(0)
+ lastChunkOffset := int64(0)
+
checksum := ""
+
+ chunks := []chunk{}
+
+ hf := &holesFinder{
+ threshold: holesThreshold,
+ reader: bufio.NewReader(tr),
+ }
+
+ rcReader := &rollingChecksumReader{
+ reader: hf,
+ rollsum: NewRollSum(),
+ }
+
+ payloadDest := io.MultiWriter(payloadDigester.Hash(), chunkDigester.Hash(), zstdWriter)
for {
- read, errRead := tr.Read(buf)
+ mustSplit, read, errRead := rcReader.Read(buf)
if errRead != nil && errRead != io.EOF {
return err
}
-
- // restart the compression only if there is
- // a payload.
+ // restart the compression only if there is a payload.
if read > 0 {
if startOffset == 0 {
startOffset, err = restartCompression()
if err != nil {
return err
}
+ lastOffset = startOffset
+ }
+
+ if _, err := payloadDest.Write(buf[:read]); err != nil {
+ return err
}
- _, err := payloadDest.Write(buf[:read])
+ }
+ if (mustSplit || errRead == io.EOF) && startOffset > 0 {
+ off, err := restartCompression()
if err != nil {
return err
}
+
+ chunkSize := rcReader.WrittenOut - lastChunkOffset
+ if chunkSize > 0 {
+ chunkType := internal.ChunkTypeData
+ if rcReader.IsLastChunkZeros {
+ chunkType = internal.ChunkTypeZeros
+ }
+
+ chunks = append(chunks, chunk{
+ ChunkOffset: lastChunkOffset,
+ Offset: lastOffset,
+ Checksum: chunkDigester.Digest().String(),
+ ChunkSize: chunkSize,
+ ChunkType: chunkType,
+ })
+ }
+
+ lastOffset = off
+ lastChunkOffset = rcReader.WrittenOut
+ chunkDigester = digest.Canonical.Digester()
+ payloadDest = io.MultiWriter(payloadDigester.Hash(), chunkDigester.Hash(), zstdWriter)
}
if errRead == io.EOF {
if startOffset > 0 {
- endOffset, err = restartCompression()
- if err != nil {
- return err
- }
checksum = payloadDigester.Digest().String()
}
break
@@ -112,30 +334,42 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
for k, v := range hdr.Xattrs {
xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
}
- m := internal.FileMetadata{
- Type: typ,
- Name: hdr.Name,
- Linkname: hdr.Linkname,
- Mode: hdr.Mode,
- Size: hdr.Size,
- UID: hdr.Uid,
- GID: hdr.Gid,
- ModTime: hdr.ModTime,
- AccessTime: hdr.AccessTime,
- ChangeTime: hdr.ChangeTime,
- Devmajor: hdr.Devmajor,
- Devminor: hdr.Devminor,
- Xattrs: xattrs,
- Digest: checksum,
- Offset: startOffset,
- EndOffset: endOffset,
-
- // ChunkSize is 0 for the last chunk
- ChunkSize: 0,
- ChunkOffset: 0,
- ChunkDigest: checksum,
- }
- metadata = append(metadata, m)
+ entries := []internal.FileMetadata{
+ {
+ Type: typ,
+ Name: hdr.Name,
+ Linkname: hdr.Linkname,
+ Mode: hdr.Mode,
+ Size: hdr.Size,
+ UID: hdr.Uid,
+ GID: hdr.Gid,
+ ModTime: &hdr.ModTime,
+ AccessTime: &hdr.AccessTime,
+ ChangeTime: &hdr.ChangeTime,
+ Devmajor: hdr.Devmajor,
+ Devminor: hdr.Devminor,
+ Xattrs: xattrs,
+ Digest: checksum,
+ Offset: startOffset,
+ EndOffset: lastOffset,
+ },
+ }
+ for i := 1; i < len(chunks); i++ {
+ entries = append(entries, internal.FileMetadata{
+ Type: internal.TypeChunk,
+ Name: hdr.Name,
+ ChunkOffset: chunks[i].ChunkOffset,
+ })
+ }
+ if len(chunks) > 1 {
+ for i := range chunks {
+ entries[i].ChunkSize = chunks[i].ChunkSize
+ entries[i].Offset = chunks[i].Offset
+ entries[i].ChunkDigest = chunks[i].Checksum
+ entries[i].ChunkType = chunks[i].ChunkType
+ }
+ }
+ metadata = append(metadata, entries...)
}
rawBytes := tr.RawBytes()
@@ -212,7 +446,7 @@ func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level
// ZstdCompressor is a CompressorFunc for the zstd compression algorithm.
func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) {
if level == nil {
- l := 3
+ l := 10
level = &l
}
diff --git a/vendor/github.com/containers/storage/pkg/chunked/compressor/rollsum.go b/vendor/github.com/containers/storage/pkg/chunked/compressor/rollsum.go
new file mode 100644
index 000000000..f4dfad822
--- /dev/null
+++ b/vendor/github.com/containers/storage/pkg/chunked/compressor/rollsum.go
@@ -0,0 +1,81 @@
+/*
+Copyright 2011 The Perkeep Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package rollsum implements rolling checksums similar to apenwarr's bup, which
+// is similar to librsync.
+//
+// The bup project is at https://github.com/apenwarr/bup and its splitting in
+// particular is at https://github.com/apenwarr/bup/blob/master/lib/bup/bupsplit.c
+package compressor
+
+import (
+ "math/bits"
+)
+
+const windowSize = 64 // Roll assumes windowSize is a power of 2
+const charOffset = 31
+
+const blobBits = 13
+const blobSize = 1 << blobBits // 8k
+
+type RollSum struct {
+ s1, s2 uint32
+ window [windowSize]uint8
+ wofs int
+}
+
+func NewRollSum() *RollSum {
+ return &RollSum{
+ s1: windowSize * charOffset,
+ s2: windowSize * (windowSize - 1) * charOffset,
+ }
+}
+
+func (rs *RollSum) add(drop, add uint32) {
+ s1 := rs.s1 + add - drop
+ rs.s1 = s1
+ rs.s2 += s1 - uint32(windowSize)*(drop+charOffset)
+}
+
+// Roll adds ch to the rolling sum.
+func (rs *RollSum) Roll(ch byte) {
+ wp := &rs.window[rs.wofs]
+ rs.add(uint32(*wp), uint32(ch))
+ *wp = ch
+ rs.wofs = (rs.wofs + 1) & (windowSize - 1)
+}
+
+// OnSplit reports whether at least 13 consecutive trailing bits of
+// the current checksum are set the same way.
+func (rs *RollSum) OnSplit() bool {
+ return (rs.s2 & (blobSize - 1)) == ((^0) & (blobSize - 1))
+}
+
+// OnSplitWithBits reports whether at least n consecutive trailing bits
+// of the current checksum are set the same way.
+func (rs *RollSum) OnSplitWithBits(n uint32) bool {
+ mask := (uint32(1) << n) - 1
+ return rs.s2&mask == (^uint32(0))&mask
+}
+
+func (rs *RollSum) Bits() int {
+ rsum := rs.Digest() >> (blobBits + 1)
+ return blobBits + bits.TrailingZeros32(^rsum)
+}
+
+func (rs *RollSum) Digest() uint32 {
+ return (rs.s1 << 16) | (rs.s2 & 0xffff)
+}
diff --git a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
index c91c43d85..3bb5286d9 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/internal/compression.go
@@ -8,11 +8,11 @@ import (
"archive/tar"
"bytes"
"encoding/binary"
- "encoding/json"
"fmt"
"io"
"time"
+ jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
)
@@ -20,6 +20,9 @@ import (
type TOC struct {
Version int `json:"version"`
Entries []FileMetadata `json:"entries"`
+
+ // internal: used by unmarshalToc
+ StringsBuf bytes.Buffer `json:"-"`
}
type FileMetadata struct {
@@ -27,26 +30,34 @@ type FileMetadata struct {
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
Mode int64 `json:"mode,omitempty"`
- Size int64 `json:"size"`
- UID int `json:"uid"`
- GID int `json:"gid"`
- ModTime time.Time `json:"modtime"`
- AccessTime time.Time `json:"accesstime"`
- ChangeTime time.Time `json:"changetime"`
- Devmajor int64 `json:"devMajor"`
- Devminor int64 `json:"devMinor"`
+ Size int64 `json:"size,omitempty"`
+ UID int `json:"uid,omitempty"`
+ GID int `json:"gid,omitempty"`
+ ModTime *time.Time `json:"modtime,omitempty"`
+ AccessTime *time.Time `json:"accesstime,omitempty"`
+ ChangeTime *time.Time `json:"changetime,omitempty"`
+ Devmajor int64 `json:"devMajor,omitempty"`
+ Devminor int64 `json:"devMinor,omitempty"`
Xattrs map[string]string `json:"xattrs,omitempty"`
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
- // Currently chunking is not supported.
ChunkSize int64 `json:"chunkSize,omitempty"`
ChunkOffset int64 `json:"chunkOffset,omitempty"`
ChunkDigest string `json:"chunkDigest,omitempty"`
+ ChunkType string `json:"chunkType,omitempty"`
+
+ // internal: computed by mergeTOCEntries.
+ Chunks []*FileMetadata `json:"-"`
}
const (
+ ChunkTypeData = ""
+ ChunkTypeZeros = "zeros"
+)
+
+const (
TypeReg = "reg"
TypeChunk = "chunk"
TypeLink = "hardlink"
@@ -123,6 +134,7 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
Entries: metadata,
}
+ var json = jsoniter.ConfigCompatibleWithStandardLibrary
// Generate the manifest
manifest, err := json.Marshal(toc)
if err != nil {
diff --git a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
index 52d21d689..92b15c2bf 100644
--- a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
+++ b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go
@@ -4,8 +4,8 @@ import (
archivetar "archive/tar"
"context"
"encoding/base64"
- "encoding/json"
"fmt"
+ "hash"
"io"
"io/ioutil"
"os"
@@ -13,6 +13,7 @@ import (
"reflect"
"sort"
"strings"
+ "sync"
"sync/atomic"
"syscall"
"time"
@@ -43,28 +44,35 @@ const (
bigDataKey = "zstd-chunked-manifest"
fileTypeZstdChunked = iota
- fileTypeEstargz = iota
+ fileTypeEstargz
+ fileTypeNoCompression
+ fileTypeHole
+
+ copyGoRoutines = 32
)
type compressedFileType int
type chunkedDiffer struct {
- stream ImageSourceSeekable
- manifest []byte
- layersMetadata map[string][]internal.FileMetadata
- layersTarget map[string]string
- tocOffset int64
- fileType compressedFileType
+ stream ImageSourceSeekable
+ manifest []byte
+ layersCache *layersCache
+ tocOffset int64
+ fileType compressedFileType
+
+ copyBuffer []byte
gzipReader *pgzip.Reader
+ zstdReader *zstd.Decoder
+ rawReader io.Reader
}
var xattrsToIgnore = map[string]interface{}{
"security.selinux": true,
}
-func timeToTimespec(time time.Time) (ts unix.Timespec) {
- if time.IsZero() {
+func timeToTimespec(time *time.Time) (ts unix.Timespec) {
+ if time == nil || time.IsZero() {
// Return UTIME_OMIT special value
ts.Sec = 0
ts.Nsec = ((1 << 30) - 2)
@@ -128,54 +136,6 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us
return dstFile, st.Size(), nil
}
-func prepareOtherLayersCache(layersMetadata map[string][]internal.FileMetadata) map[string]map[string][]*internal.FileMetadata {
- maps := make(map[string]map[string][]*internal.FileMetadata)
-
- for layerID, v := range layersMetadata {
- r := make(map[string][]*internal.FileMetadata)
- for i := range v {
- if v[i].Digest != "" {
- r[v[i].Digest] = append(r[v[i].Digest], &v[i])
- }
- }
- maps[layerID] = r
- }
- return maps
-}
-
-func getLayersCache(store storage.Store) (map[string][]internal.FileMetadata, map[string]string, error) {
- allLayers, err := store.Layers()
- if err != nil {
- return nil, nil, err
- }
-
- layersMetadata := make(map[string][]internal.FileMetadata)
- layersTarget := make(map[string]string)
- for _, r := range allLayers {
- manifestReader, err := store.LayerBigData(r.ID, bigDataKey)
- if err != nil {
- continue
- }
- defer manifestReader.Close()
- manifest, err := ioutil.ReadAll(manifestReader)
- if err != nil {
- return nil, nil, fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
- }
- var toc internal.TOC
- if err := json.Unmarshal(manifest, &toc); err != nil {
- continue
- }
- layersMetadata[r.ID] = toc.Entries
- target, err := store.DifferTarget(r.ID)
- if err != nil {
- return nil, nil, fmt.Errorf("get checkout directory layer %q: %w", r.ID, err)
- }
- layersTarget[r.ID] = target
- }
-
- return layersMetadata, layersTarget, nil
-}
-
// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
if _, ok := annotations[internal.ManifestChecksumKey]; ok {
@@ -192,18 +152,18 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
- layersMetadata, layersTarget, err := getLayersCache(store)
+ layersCache, err := getLayersCache(store)
if err != nil {
return nil, err
}
return &chunkedDiffer{
- stream: iss,
- manifest: manifest,
- layersMetadata: layersMetadata,
- layersTarget: layersTarget,
- tocOffset: tocOffset,
- fileType: fileTypeZstdChunked,
+ copyBuffer: makeCopyBuffer(),
+ stream: iss,
+ manifest: manifest,
+ layersCache: layersCache,
+ tocOffset: tocOffset,
+ fileType: fileTypeZstdChunked,
}, nil
}
@@ -212,37 +172,41 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
- layersMetadata, layersTarget, err := getLayersCache(store)
+ layersCache, err := getLayersCache(store)
if err != nil {
return nil, err
}
return &chunkedDiffer{
- stream: iss,
- manifest: manifest,
- layersMetadata: layersMetadata,
- layersTarget: layersTarget,
- tocOffset: tocOffset,
- fileType: fileTypeEstargz,
+ copyBuffer: makeCopyBuffer(),
+ stream: iss,
+ manifest: manifest,
+ layersCache: layersCache,
+ tocOffset: tocOffset,
+ fileType: fileTypeEstargz,
}, nil
}
+func makeCopyBuffer() []byte {
+ return make([]byte, 2<<20)
+}
+
// copyFileFromOtherLayer copies a file from another layer
// file is the file to look for.
// source is the path to the source layer checkout.
-// otherFile contains the metadata for the file.
+// name is the path to the file to copy in source.
// dirfd is an open file descriptor to the destination root directory.
// useHardLinks defines whether the deduplication can be performed using hard links.
-func copyFileFromOtherLayer(file *internal.FileMetadata, source string, otherFile *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
if err != nil {
- return false, nil, 0, fmt.Errorf("open source file %q: %w", source, err)
+ return false, nil, 0, fmt.Errorf("open source file: %w", err)
}
defer unix.Close(srcDirfd)
- srcFile, err := openFileUnderRoot(otherFile.Name, srcDirfd, unix.O_RDONLY, 0)
+ srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0)
if err != nil {
- return false, nil, 0, fmt.Errorf("open source file %q under target rootfs: %w", otherFile.Name, err)
+ return false, nil, 0, fmt.Errorf("open source file under target rootfs: %w", err)
}
defer srcFile.Close()
@@ -308,45 +272,9 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo
return canDedupMetadataWithHardLink(file, &otherFile)
}
-// findFileInOtherLayers finds the specified file in other layers.
-// file is the file to look for.
-// dirfd is an open file descriptor to the checkout root directory.
-// layersMetadata contains the metadata for each layer in the storage.
-// layersTarget maps each layer to its checkout on disk.
-// useHardLinks defines whether the deduplication can be performed using hard links.
-func findFileInOtherLayers(file *internal.FileMetadata, dirfd int, layersMetadata map[string]map[string][]*internal.FileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) {
- // this is ugly, needs to be indexed
- for layerID, checksums := range layersMetadata {
- source, ok := layersTarget[layerID]
- if !ok {
- continue
- }
- files, found := checksums[file.Digest]
- if !found {
- continue
- }
- for _, candidate := range files {
- // check if it is a valid candidate to dedup file
- if useHardLinks && !canDedupMetadataWithHardLink(file, candidate) {
- continue
- }
-
- found, dstFile, written, err := copyFileFromOtherLayer(file, source, candidate, dirfd, useHardLinks)
- if found && err == nil {
- return found, dstFile, written, err
- }
- }
- }
- // If hard links deduplication was used and it has failed, try again without hard links.
- if useHardLinks {
- return findFileInOtherLayers(file, dirfd, layersMetadata, layersTarget, false)
- }
- return false, nil, 0, nil
-}
-
-func getFileDigest(f *os.File) (digest.Digest, error) {
+func getFileDigest(f *os.File, buf []byte) (digest.Digest, error) {
digester := digest.Canonical.Digester()
- if _, err := io.Copy(digester.Hash(), f); err != nil {
+ if _, err := io.CopyBuffer(digester.Hash(), f, buf); err != nil {
return "", err
}
return digester.Digest(), nil
@@ -408,7 +336,7 @@ func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, di
// file is the file to look for.
// dirfd is an open fd to the destination checkout.
// useHardLinks defines whether the deduplication can be performed using hard links.
-func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool, buf []byte) (bool, *os.File, int64, error) {
sourceFile := filepath.Clean(filepath.Join("/", file.Name))
if !strings.HasPrefix(sourceFile, "/usr/") {
// limit host deduplication to files under /usr.
@@ -437,7 +365,7 @@ func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool
return false, nil, 0, err
}
- checksum, err := getFileDigest(f)
+ checksum, err := getFileDigest(f, buf)
if err != nil {
return false, nil, 0, err
}
@@ -459,7 +387,7 @@ func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool
dstFile.Close()
return false, nil, 0, err
}
- checksum, err = getFileDigest(f)
+ checksum, err = getFileDigest(f, buf)
if err != nil {
dstFile.Close()
return false, nil, 0, err
@@ -471,6 +399,19 @@ func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool
return true, dstFile, written, nil
}
+// findFileInOtherLayers finds the specified file in other layers.
+// cache is the layers cache to use.
+// file is the file to look for.
+// dirfd is an open file descriptor to the checkout root directory.
+// useHardLinks defines whether the deduplication can be performed using hard links.
+func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
+ target, name, err := cache.findFileInOtherLayers(file, useHardLinks)
+ if err != nil || name == "" {
+ return false, nil, 0, err
+ }
+ return copyFileFromOtherLayer(file, target, name, dirfd, useHardLinks)
+}
+
func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error {
if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 {
return nil
@@ -497,18 +438,46 @@ func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOption
return nil
}
-type missingFile struct {
- File *internal.FileMetadata
+type originFile struct {
+ Root string
+ Path string
+ Offset int64
+}
+
+type missingFileChunk struct {
Gap int64
+ Hole bool
+
+ File *internal.FileMetadata
+
+ CompressedSize int64
+ UncompressedSize int64
}
-func (m missingFile) Length() int64 {
- return m.File.EndOffset - m.File.Offset
+type missingPart struct {
+ Hole bool
+ SourceChunk *ImageSourceChunk
+ OriginFile *originFile
+ Chunks []missingFileChunk
}
-type missingChunk struct {
- RawChunk ImageSourceChunk
- Files []missingFile
+func (o *originFile) OpenFile() (io.ReadCloser, error) {
+ srcDirfd, err := unix.Open(o.Root, unix.O_RDONLY, 0)
+ if err != nil {
+ return nil, fmt.Errorf("open source file: %w", err)
+ }
+ defer unix.Close(srcDirfd)
+
+ srcFile, err := openFileUnderRoot(o.Path, srcDirfd, unix.O_RDONLY, 0)
+ if err != nil {
+ return nil, fmt.Errorf("open source file under target rootfs: %w", err)
+ }
+
+ if _, err := srcFile.Seek(o.Offset, 0); err != nil {
+ srcFile.Close()
+ return nil, err
+ }
+ return srcFile, nil
}
// setFileAttrs sets the file attributes for file given metadata
@@ -711,7 +680,7 @@ func openFileUnderRoot(name string, dirfd int, flags uint64, mode os.FileMode) (
newDirfd, err2 := openOrCreateDirUnderRoot(parent, dirfd, 0)
if err2 == nil {
defer newDirfd.Close()
- fd, err := openFileUnderRootRaw(dirfd, name, flags, mode)
+ fd, err := openFileUnderRootRaw(int(newDirfd.Fd()), filepath.Base(name), flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
@@ -755,159 +724,367 @@ func openOrCreateDirUnderRoot(name string, dirfd int, mode os.FileMode) (*os.Fil
return nil, err
}
-func (c *chunkedDiffer) createFileFromCompressedStream(dest string, dirfd int, reader io.Reader, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) (err error) {
- file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
- if err != nil {
- return err
+func (c *chunkedDiffer) prepareCompressedStreamToFile(partCompression compressedFileType, from io.Reader, mf *missingFileChunk) (compressedFileType, error) {
+ switch {
+ case partCompression == fileTypeHole:
+ // The entire part is a hole. Do not need to read from a file.
+ c.rawReader = nil
+ return fileTypeHole, nil
+ case mf.Hole:
+ // Only the missing chunk in the requested part refers to a hole.
+ // The received data must be discarded.
+ limitReader := io.LimitReader(from, mf.CompressedSize)
+ _, err := io.CopyBuffer(ioutil.Discard, limitReader, c.copyBuffer)
+ return fileTypeHole, err
+ case partCompression == fileTypeZstdChunked:
+ c.rawReader = io.LimitReader(from, mf.CompressedSize)
+ if c.zstdReader == nil {
+ r, err := zstd.NewReader(c.rawReader)
+ if err != nil {
+ return partCompression, err
+ }
+ c.zstdReader = r
+ } else {
+ if err := c.zstdReader.Reset(c.rawReader); err != nil {
+ return partCompression, err
+ }
+ }
+ case partCompression == fileTypeEstargz:
+ c.rawReader = io.LimitReader(from, mf.CompressedSize)
+ if c.gzipReader == nil {
+ r, err := pgzip.NewReader(c.rawReader)
+ if err != nil {
+ return partCompression, err
+ }
+ c.gzipReader = r
+ } else {
+ if err := c.gzipReader.Reset(c.rawReader); err != nil {
+ return partCompression, err
+ }
+ }
+ case partCompression == fileTypeNoCompression:
+ c.rawReader = io.LimitReader(from, mf.UncompressedSize)
+ default:
+ return partCompression, fmt.Errorf("unknown file type %q", c.fileType)
}
- defer func() {
- err2 := file.Close()
- if err == nil {
- err = err2
+ return partCompression, nil
+}
+
+// hashHole writes SIZE zeros to the specified hasher
+func hashHole(h hash.Hash, size int64, copyBuffer []byte) error {
+ count := int64(len(copyBuffer))
+ if size < count {
+ count = size
+ }
+ for i := int64(0); i < count; i++ {
+ copyBuffer[i] = 0
+ }
+ for size > 0 {
+ count = int64(len(copyBuffer))
+ if size < count {
+ count = size
}
- }()
+ if _, err := h.Write(copyBuffer[:count]); err != nil {
+ return err
+ }
+ size -= count
+ }
+ return nil
+}
- digester := digest.Canonical.Digester()
- checksum := digester.Hash()
- to := io.MultiWriter(file, checksum)
+// appendHole creates a hole with the specified size at the open fd.
+func appendHole(fd int, size int64) error {
+ off, err := unix.Seek(fd, size, unix.SEEK_CUR)
+ if err != nil {
+ return err
+ }
+ // Make sure the file size is changed. It might be the last hole and no other data written afterwards.
+ if err := unix.Ftruncate(fd, off); err != nil {
+ return err
+ }
+ return nil
+}
- switch c.fileType {
+func (c *chunkedDiffer) appendCompressedStreamToFile(compression compressedFileType, destFile *destinationFile, size int64) error {
+ switch compression {
case fileTypeZstdChunked:
- z, err := zstd.NewReader(reader)
- if err != nil {
+ defer c.zstdReader.Reset(nil)
+ if _, err := io.CopyBuffer(destFile.to, io.LimitReader(c.zstdReader, size), c.copyBuffer); err != nil {
return err
}
- defer z.Close()
-
- if _, err := io.Copy(to, io.LimitReader(z, metadata.Size)); err != nil {
+ case fileTypeEstargz:
+ defer c.gzipReader.Close()
+ if _, err := io.CopyBuffer(destFile.to, io.LimitReader(c.gzipReader, size), c.copyBuffer); err != nil {
return err
}
- if _, err := io.Copy(ioutil.Discard, reader); err != nil {
+ case fileTypeNoCompression:
+ if _, err := io.CopyBuffer(destFile.to, io.LimitReader(c.rawReader, size), c.copyBuffer); err != nil {
return err
}
- case fileTypeEstargz:
- if c.gzipReader == nil {
- r, err := pgzip.NewReader(reader)
- if err != nil {
- return err
- }
- c.gzipReader = r
- } else {
- if err := c.gzipReader.Reset(reader); err != nil {
- return err
- }
- }
- defer c.gzipReader.Close()
-
- if _, err := io.Copy(to, io.LimitReader(c.gzipReader, metadata.Size)); err != nil {
+ case fileTypeHole:
+ if err := appendHole(int(destFile.file.Fd()), size); err != nil {
return err
}
- if _, err := io.Copy(ioutil.Discard, reader); err != nil {
+ if err := hashHole(destFile.hash, size, c.copyBuffer); err != nil {
return err
}
default:
return fmt.Errorf("unknown file type %q", c.fileType)
}
+ return nil
+}
+
+type destinationFile struct {
+ dirfd int
+ file *os.File
+ digester digest.Digester
+ hash hash.Hash
+ to io.Writer
+ metadata *internal.FileMetadata
+ options *archive.TarOptions
+}
+
+func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *archive.TarOptions) (*destinationFile, error) {
+ file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
+ if err != nil {
+ return nil, err
+ }
+
+ digester := digest.Canonical.Digester()
+ hash := digester.Hash()
+ to := io.MultiWriter(file, hash)
+
+ return &destinationFile{
+ file: file,
+ digester: digester,
+ hash: hash,
+ to: to,
+ metadata: metadata,
+ options: options,
+ dirfd: dirfd,
+ }, nil
+}
- manifestChecksum, err := digest.Parse(metadata.Digest)
+func (d *destinationFile) Close() error {
+ manifestChecksum, err := digest.Parse(d.metadata.Digest)
if err != nil {
return err
}
- if digester.Digest() != manifestChecksum {
- return fmt.Errorf("checksum mismatch for %q", dest)
+ if d.digester.Digest() != manifestChecksum {
+ return fmt.Errorf("checksum mismatch for %q (got %q instead of %q)", d.file.Name(), d.digester.Digest(), manifestChecksum)
}
- return setFileAttrs(dirfd, file, mode, metadata, options, false)
+
+ return setFileAttrs(d.dirfd, d.file, os.FileMode(d.metadata.Mode), d.metadata, d.options, false)
}
-func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
- for mc := 0; ; mc++ {
- var part io.ReadCloser
- select {
- case p := <-streams:
- part = p
- case err := <-errs:
- return err
- }
- if part == nil {
- if mc == len(missingChunks) {
- break
+func closeDestinationFiles(files chan *destinationFile, errors chan error) {
+ for f := range files {
+ errors <- f.Close()
+ }
+ close(errors)
+}
+
+func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) (Err error) {
+ var destFile *destinationFile
+
+ filesToClose := make(chan *destinationFile, 3)
+ closeFilesErrors := make(chan error, 2)
+
+ go closeDestinationFiles(filesToClose, closeFilesErrors)
+ defer func() {
+ close(filesToClose)
+ for e := range closeFilesErrors {
+ if e != nil && Err == nil {
+ Err = e
}
- return errors.Errorf("invalid stream returned")
}
- if mc == len(missingChunks) {
- part.Close()
- return errors.Errorf("too many chunks returned")
+ }()
+
+ for _, missingPart := range missingParts {
+ var part io.ReadCloser
+ partCompression := c.fileType
+ switch {
+ case missingPart.Hole:
+ partCompression = fileTypeHole
+ case missingPart.OriginFile != nil:
+ var err error
+ part, err = missingPart.OriginFile.OpenFile()
+ if err != nil {
+ return err
+ }
+ partCompression = fileTypeNoCompression
+ case missingPart.SourceChunk != nil:
+ select {
+ case p := <-streams:
+ part = p
+ case err := <-errs:
+ return err
+ }
+ if part == nil {
+ return errors.Errorf("invalid stream returned")
+ }
+ default:
+ return errors.Errorf("internal error: missing part misses both local and remote data stream")
}
- for _, mf := range missingChunks[mc].Files {
+ for _, mf := range missingPart.Chunks {
if mf.Gap > 0 {
limitReader := io.LimitReader(part, mf.Gap)
- _, err := io.Copy(ioutil.Discard, limitReader)
+ _, err := io.CopyBuffer(ioutil.Discard, limitReader, c.copyBuffer)
if err != nil {
- part.Close()
- return err
+ Err = err
+ goto exit
}
continue
}
- limitReader := io.LimitReader(part, mf.Length())
+ if mf.File.Name == "" {
+ Err = errors.Errorf("file name empty")
+ goto exit
+ }
- if err := c.createFileFromCompressedStream(dest, dirfd, limitReader, os.FileMode(mf.File.Mode), mf.File, options); err != nil {
- part.Close()
- return err
+ compression, err := c.prepareCompressedStreamToFile(partCompression, part, &mf)
+ if err != nil {
+ Err = err
+ goto exit
+ }
+
+ // Open the new file if it is different that what is already
+ // opened
+ if destFile == nil || destFile.metadata.Name != mf.File.Name {
+ var err error
+ if destFile != nil {
+ cleanup:
+ for {
+ select {
+ case err = <-closeFilesErrors:
+ if err != nil {
+ Err = err
+ goto exit
+ }
+ default:
+ break cleanup
+ }
+ }
+ filesToClose <- destFile
+ }
+ destFile, err = openDestinationFile(dirfd, mf.File, options)
+ if err != nil {
+ Err = err
+ goto exit
+ }
+ }
+
+ if err := c.appendCompressedStreamToFile(compression, destFile, mf.UncompressedSize); err != nil {
+ Err = err
+ goto exit
+ }
+ if c.rawReader != nil {
+ if _, err := io.CopyBuffer(ioutil.Discard, c.rawReader, c.copyBuffer); err != nil {
+ Err = err
+ goto exit
+ }
+ }
+ }
+ exit:
+ if part != nil {
+ part.Close()
+ if Err != nil {
+ break
}
}
- part.Close()
}
+
+ if destFile != nil {
+ return destFile.Close()
+ }
+
return nil
}
-func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk {
- if len(missingChunks) <= target {
- return missingChunks
+func mergeMissingChunks(missingParts []missingPart, target int) []missingPart {
+ getGap := func(missingParts []missingPart, i int) int {
+ prev := missingParts[i-1].SourceChunk.Offset + missingParts[i-1].SourceChunk.Length
+ return int(missingParts[i].SourceChunk.Offset - prev)
+ }
+ getCost := func(missingParts []missingPart, i int) int {
+ cost := getGap(missingParts, i)
+ if missingParts[i-1].OriginFile != nil {
+ cost += int(missingParts[i-1].SourceChunk.Length)
+ }
+ if missingParts[i].OriginFile != nil {
+ cost += int(missingParts[i].SourceChunk.Length)
+ }
+ return cost
+ }
+
+ // simple case: merge chunks from the same file.
+ newMissingParts := missingParts[0:1]
+ prevIndex := 0
+ for i := 1; i < len(missingParts); i++ {
+ gap := getGap(missingParts, i)
+ if gap == 0 && missingParts[prevIndex].OriginFile == nil &&
+ missingParts[i].OriginFile == nil &&
+ !missingParts[prevIndex].Hole && !missingParts[i].Hole &&
+ len(missingParts[prevIndex].Chunks) == 1 && len(missingParts[i].Chunks) == 1 &&
+ missingParts[prevIndex].Chunks[0].File.Name == missingParts[i].Chunks[0].File.Name {
+ missingParts[prevIndex].SourceChunk.Length += uint64(gap) + missingParts[i].SourceChunk.Length
+ missingParts[prevIndex].Chunks[0].CompressedSize += missingParts[i].Chunks[0].CompressedSize
+ missingParts[prevIndex].Chunks[0].UncompressedSize += missingParts[i].Chunks[0].UncompressedSize
+ } else {
+ newMissingParts = append(newMissingParts, missingParts[i])
+ prevIndex++
+ }
}
+ missingParts = newMissingParts
- getGap := func(missingChunks []missingChunk, i int) int {
- prev := missingChunks[i-1].RawChunk.Offset + missingChunks[i-1].RawChunk.Length
- return int(missingChunks[i].RawChunk.Offset - prev)
+ if len(missingParts) <= target {
+ return missingParts
}
// this implementation doesn't account for duplicates, so it could merge
// more than necessary to reach the specified target. Since target itself
// is a heuristic value, it doesn't matter.
- var gaps []int
- for i := 1; i < len(missingChunks); i++ {
- gaps = append(gaps, getGap(missingChunks, i))
+ costs := make([]int, len(missingParts)-1)
+ for i := 1; i < len(missingParts); i++ {
+ costs[i-1] = getCost(missingParts, i)
}
- sort.Ints(gaps)
+ sort.Ints(costs)
- toShrink := len(missingChunks) - target
- targetValue := gaps[toShrink-1]
+ toShrink := len(missingParts) - target
+ if toShrink >= len(costs) {
+ toShrink = len(costs) - 1
+ }
+ targetValue := costs[toShrink]
- newMissingChunks := missingChunks[0:1]
- for i := 1; i < len(missingChunks); i++ {
- gap := getGap(missingChunks, i)
- if gap > targetValue {
- newMissingChunks = append(newMissingChunks, missingChunks[i])
+ newMissingParts = missingParts[0:1]
+ for i := 1; i < len(missingParts); i++ {
+ if getCost(missingParts, i) > targetValue {
+ newMissingParts = append(newMissingParts, missingParts[i])
} else {
- prev := &newMissingChunks[len(newMissingChunks)-1]
- prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length
+ gap := getGap(missingParts, i)
+ prev := &newMissingParts[len(newMissingParts)-1]
+ prev.SourceChunk.Length += uint64(gap) + missingParts[i].SourceChunk.Length
+ prev.Hole = false
+ prev.OriginFile = nil
if gap > 0 {
- gapFile := missingFile{
+ gapFile := missingFileChunk{
Gap: int64(gap),
}
- prev.Files = append(prev.Files, gapFile)
+ prev.Chunks = append(prev.Chunks, gapFile)
}
- prev.Files = append(prev.Files, missingChunks[i].Files...)
+ prev.Chunks = append(prev.Chunks, missingParts[i].Chunks...)
}
}
- return newMissingChunks
+ return newMissingParts
}
-func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingChunks []missingChunk, options *archive.TarOptions) error {
+func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) error {
var chunksToRequest []ImageSourceChunk
- for _, c := range missingChunks {
- chunksToRequest = append(chunksToRequest, c.RawChunk)
+ for _, c := range missingParts {
+ if c.OriginFile == nil && !c.Hole {
+ chunksToRequest = append(chunksToRequest, *c.SourceChunk)
+ }
}
// There are some missing files. Prepare a multirange request for the missing chunks.
@@ -921,20 +1098,20 @@ func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingChun
}
if _, ok := err.(ErrBadRequest); ok {
- requested := len(missingChunks)
+ requested := len(missingParts)
// If the server cannot handle at least 64 chunks in a single request, just give up.
if requested < 64 {
return err
}
// Merge more chunks to request
- missingChunks = mergeMissingChunks(missingChunks, requested/2)
+ missingParts = mergeMissingChunks(missingParts, requested/2)
continue
}
return err
}
- if err := c.storeMissingFiles(streams, errs, dest, dirfd, missingChunks, options); err != nil {
+ if err := c.storeMissingFiles(streams, errs, dest, dirfd, missingParts, options); err != nil {
return err
}
return nil
@@ -960,7 +1137,7 @@ func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.File
}
}
- file, err := openFileUnderRoot(name, dirfd, unix.O_DIRECTORY|unix.O_RDONLY, 0)
+ file, err := openFileUnderRoot(base, parentFd, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
@@ -1109,7 +1286,69 @@ func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bo
return def
}
+type findAndCopyFileOptions struct {
+ useHardLinks bool
+ enableHostDedup bool
+ ostreeRepos []string
+ options *archive.TarOptions
+}
+
+func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) {
+ finalizeFile := func(dstFile *os.File) error {
+ if dstFile != nil {
+ defer dstFile.Close()
+ if err := setFileAttrs(dirfd, dstFile, mode, r, copyOptions.options, false); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+
+ found, dstFile, _, err := findFileInOtherLayers(c.layersCache, r, dirfd, copyOptions.useHardLinks)
+ if err != nil {
+ return false, err
+ }
+ if found {
+ if err := finalizeFile(dstFile); err != nil {
+ return false, err
+ }
+ return true, nil
+ }
+
+ found, dstFile, _, err = findFileInOSTreeRepos(r, copyOptions.ostreeRepos, dirfd, copyOptions.useHardLinks)
+ if err != nil {
+ return false, err
+ }
+ if found {
+ if err := finalizeFile(dstFile); err != nil {
+ return false, err
+ }
+ return true, nil
+ }
+
+ if copyOptions.enableHostDedup {
+ found, dstFile, _, err = findFileOnTheHost(r, dirfd, copyOptions.useHardLinks, c.copyBuffer)
+ if err != nil {
+ return false, err
+ }
+ if found {
+ if err := finalizeFile(dstFile); err != nil {
+ return false, err
+ }
+ return true, nil
+ }
+ }
+ return false, nil
+}
+
func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) {
+ defer c.layersCache.release()
+ defer func() {
+ if c.zstdReader != nil {
+ c.zstdReader.Close()
+ }
+ }()
+
bigData := map[string][]byte{
bigDataKey: c.manifest,
}
@@ -1137,14 +1376,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
ostreeRepos := strings.Split(storeOpts.PullOptions["ostree_repos"], ":")
// Generate the manifest
- var toc internal.TOC
- if err := json.Unmarshal(c.manifest, &toc); err != nil {
+ toc, err := unmarshalToc(c.manifest)
+ if err != nil {
return output, err
}
whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData)
- var missingChunks []missingChunk
+ var missingParts []missingPart
mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries)
if err != nil {
@@ -1170,13 +1409,57 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
}
defer unix.Close(dirfd)
- otherLayersCache := prepareOtherLayersCache(c.layersMetadata)
-
// hardlinks can point to missing files. So create them after all files
// are retrieved
var hardLinks []hardLinkToCreate
- missingChunksSize, totalChunksSize := int64(0), int64(0)
+ missingPartsSize, totalChunksSize := int64(0), int64(0)
+
+ copyOptions := findAndCopyFileOptions{
+ useHardLinks: useHardLinks,
+ enableHostDedup: enableHostDedup,
+ ostreeRepos: ostreeRepos,
+ options: options,
+ }
+
+ type copyFileJob struct {
+ njob int
+ index int
+ mode os.FileMode
+ metadata *internal.FileMetadata
+
+ found bool
+ err error
+ }
+
+ var wg sync.WaitGroup
+
+ copyResults := make([]copyFileJob, len(mergedEntries))
+
+ copyFileJobs := make(chan copyFileJob)
+ defer func() {
+ if copyFileJobs != nil {
+ close(copyFileJobs)
+ }
+ wg.Wait()
+ }()
+
+ for i := 0; i < copyGoRoutines; i++ {
+ wg.Add(1)
+ jobs := copyFileJobs
+
+ go func() {
+ defer wg.Done()
+ for job := range jobs {
+ found, err := c.findAndCopyFile(dirfd, job.metadata, &copyOptions, job.mode)
+ job.err = err
+ job.found = found
+ copyResults[job.njob] = job
+ }
+ }()
+ }
+
+ filesToWaitFor := 0
for i, r := range mergedEntries {
if options.ForceMask != nil {
value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&07777)
@@ -1272,74 +1555,95 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
totalChunksSize += r.Size
- finalizeFile := func(dstFile *os.File) error {
- if dstFile != nil {
- defer dstFile.Close()
- if err := setFileAttrs(dirfd, dstFile, mode, &r, options, false); err != nil {
- return err
- }
+ if t == tar.TypeReg {
+ index := i
+ njob := filesToWaitFor
+ job := copyFileJob{
+ mode: mode,
+ metadata: &mergedEntries[index],
+ index: index,
+ njob: njob,
}
- return nil
+ copyFileJobs <- job
+ filesToWaitFor++
}
+ }
- found, dstFile, _, err := findFileInOtherLayers(&r, dirfd, otherLayersCache, c.layersTarget, useHardLinks)
- if err != nil {
- return output, err
- }
- if found {
- if err := finalizeFile(dstFile); err != nil {
- return output, err
- }
- continue
- }
+ close(copyFileJobs)
+ copyFileJobs = nil
- found, dstFile, _, err = findFileInOSTreeRepos(&r, ostreeRepos, dirfd, useHardLinks)
- if err != nil {
- return output, err
+ wg.Wait()
+
+ for _, res := range copyResults[:filesToWaitFor] {
+ if res.err != nil {
+ return output, res.err
}
- if found {
- if err := finalizeFile(dstFile); err != nil {
- return output, err
- }
+ // the file was already copied to its destination
+ // so nothing left to do.
+ if res.found {
continue
}
- if enableHostDedup {
- found, dstFile, _, err = findFileOnTheHost(&r, dirfd, useHardLinks)
- if err != nil {
- return output, err
- }
- if found {
- if err := finalizeFile(dstFile); err != nil {
- return output, err
- }
- continue
+ r := &mergedEntries[res.index]
+
+ missingPartsSize += r.Size
+
+ remainingSize := r.Size
+
+ // the file is missing, attempt to find individual chunks.
+ for _, chunk := range r.Chunks {
+ compressedSize := int64(chunk.EndOffset - chunk.Offset)
+ size := remainingSize
+ if chunk.ChunkSize > 0 {
+ size = chunk.ChunkSize
}
- }
+ remainingSize = remainingSize - size
- missingChunksSize += r.Size
- if t == tar.TypeReg {
rawChunk := ImageSourceChunk{
- Offset: uint64(r.Offset),
- Length: uint64(r.EndOffset - r.Offset),
+ Offset: uint64(chunk.Offset),
+ Length: uint64(compressedSize),
}
-
- file := missingFile{
- File: &mergedEntries[i],
+ file := missingFileChunk{
+ File: &mergedEntries[res.index],
+ CompressedSize: compressedSize,
+ UncompressedSize: size,
}
-
- missingChunks = append(missingChunks, missingChunk{
- RawChunk: rawChunk,
- Files: []missingFile{
+ mp := missingPart{
+ SourceChunk: &rawChunk,
+ Chunks: []missingFileChunk{
file,
},
- })
+ }
+
+ switch chunk.ChunkType {
+ case internal.ChunkTypeData:
+ root, path, offset, err := c.layersCache.findChunkInOtherLayers(chunk)
+ if err != nil {
+ return output, err
+ }
+ if offset >= 0 && validateChunkChecksum(chunk, root, path, offset, c.copyBuffer) {
+ missingPartsSize -= size
+ mp.OriginFile = &originFile{
+ Root: root,
+ Path: path,
+ Offset: offset,
+ }
+ }
+ case internal.ChunkTypeZeros:
+ missingPartsSize -= size
+ mp.Hole = true
+ // Mark all chunks belonging to the missing part as holes
+ for i := range mp.Chunks {
+ mp.Chunks[i].Hole = true
+ }
+ }
+ missingParts = append(missingParts, mp)
}
}
// There are some missing files. Prepare a multirange request for the missing chunks.
- if len(missingChunks) > 0 {
- missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks)
- if err := c.retrieveMissingFiles(dest, dirfd, missingChunks, options); err != nil {
+ if len(missingParts) > 0 {
+ missingParts = mergeMissingChunks(missingParts, maxNumberMissingChunks)
+ if err := c.retrieveMissingFiles(dest, dirfd, missingParts, options); err != nil {
return output, err
}
}
@@ -1351,31 +1655,69 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
}
if totalChunksSize > 0 {
- logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingChunksSize, totalChunksSize, float32(missingChunksSize*100.0)/float32(totalChunksSize))
+ logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingPartsSize, totalChunksSize, float32(missingPartsSize*100.0)/float32(totalChunksSize))
}
return output, nil
}
+func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool {
+ // ignore the metadata files for the estargz format.
+ if fileType != fileTypeEstargz {
+ return false
+ }
+ switch e.Name {
+ // ignore the metadata files for the estargz format.
+ case estargz.PrefetchLandmark, estargz.NoPrefetchLandmark, estargz.TOCTarName:
+ return true
+ }
+ return false
+}
+
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]internal.FileMetadata, error) {
- var mergedEntries []internal.FileMetadata
- var prevEntry *internal.FileMetadata
- for _, entry := range entries {
- e := entry
+ countNextChunks := func(start int) int {
+ count := 0
+ for _, e := range entries[start:] {
+ if e.Type != TypeChunk {
+ return count
+ }
+ count++
+ }
+ return count
+ }
- // ignore the metadata files for the estargz format.
- if fileType == fileTypeEstargz && (e.Name == estargz.PrefetchLandmark || e.Name == estargz.NoPrefetchLandmark || e.Name == estargz.TOCTarName) {
+ size := 0
+ for _, entry := range entries {
+ if mustSkipFile(fileType, entry) {
continue
}
+ if entry.Type != TypeChunk {
+ size++
+ }
+ }
+ mergedEntries := make([]internal.FileMetadata, size)
+ m := 0
+ for i := 0; i < len(entries); i++ {
+ e := entries[i]
+ if mustSkipFile(fileType, e) {
+ continue
+ }
if e.Type == TypeChunk {
- if prevEntry == nil || prevEntry.Type != TypeReg {
- return nil, errors.New("chunk type without a regular file")
+ return nil, fmt.Errorf("chunk type without a regular file")
+ }
+
+ if e.Type == TypeReg {
+ nChunks := countNextChunks(i + 1)
+
+ e.Chunks = make([]*internal.FileMetadata, nChunks+1)
+ for j := 0; j <= nChunks; j++ {
+ e.Chunks[j] = &entries[i+j]
+ e.EndOffset = entries[i+j].EndOffset
}
- prevEntry.EndOffset = e.EndOffset
- continue
+ i += nChunks
}
- mergedEntries = append(mergedEntries, e)
- prevEntry = &e
+ mergedEntries[m] = e
+ m++
}
// stargz/estargz doesn't store EndOffset so let's calculate it here
lastOffset := c.tocOffset
@@ -1386,6 +1728,47 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
if mergedEntries[i].Offset != 0 {
lastOffset = mergedEntries[i].Offset
}
+
+ lastChunkOffset := mergedEntries[i].EndOffset
+ for j := len(mergedEntries[i].Chunks) - 1; j >= 0; j-- {
+ mergedEntries[i].Chunks[j].EndOffset = lastChunkOffset
+ mergedEntries[i].Chunks[j].Size = mergedEntries[i].Chunks[j].EndOffset - mergedEntries[i].Chunks[j].Offset
+ lastChunkOffset = mergedEntries[i].Chunks[j].Offset
+ }
}
return mergedEntries, nil
}
+
+// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the
+// same digest as chunk.ChunkDigest
+func validateChunkChecksum(chunk *internal.FileMetadata, root, path string, offset int64, copyBuffer []byte) bool {
+ parentDirfd, err := unix.Open(root, unix.O_PATH, 0)
+ if err != nil {
+ return false
+ }
+ defer unix.Close(parentDirfd)
+
+ fd, err := openFileUnderRoot(path, parentDirfd, unix.O_RDONLY, 0)
+ if err != nil {
+ return false
+ }
+ defer fd.Close()
+
+ if _, err := unix.Seek(int(fd.Fd()), offset, 0); err != nil {
+ return false
+ }
+
+ r := io.LimitReader(fd, chunk.ChunkSize)
+ digester := digest.Canonical.Digester()
+
+ if _, err := io.CopyBuffer(digester.Hash(), r, copyBuffer); err != nil {
+ return false
+ }
+
+ digest, err := digest.Parse(chunk.ChunkDigest)
+ if err != nil {
+ return false
+ }
+
+ return digester.Digest() == digest
+}
diff --git a/vendor/github.com/containers/storage/pkg/idtools/idtools.go b/vendor/github.com/containers/storage/pkg/idtools/idtools.go
index 83e797599..0abe886eb 100644
--- a/vendor/github.com/containers/storage/pkg/idtools/idtools.go
+++ b/vendor/github.com/containers/storage/pkg/idtools/idtools.go
@@ -82,7 +82,7 @@ func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
if len(uidMap) == 1 && uidMap[0].Size == 1 {
uid = uidMap[0].HostID
} else {
- uid, err = toHost(0, uidMap)
+ uid, err = RawToHost(0, uidMap)
if err != nil {
return -1, -1, err
}
@@ -90,7 +90,7 @@ func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
if len(gidMap) == 1 && gidMap[0].Size == 1 {
gid = gidMap[0].HostID
} else {
- gid, err = toHost(0, gidMap)
+ gid, err = RawToHost(0, gidMap)
if err != nil {
return -1, -1, err
}
@@ -98,10 +98,14 @@ func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
return uid, gid, nil
}
-// toContainer takes an id mapping, and uses it to translate a
-// host ID to the remapped ID. If no map is provided, then the translation
-// assumes a 1-to-1 mapping and returns the passed in id
-func toContainer(hostID int, idMap []IDMap) (int, error) {
+// RawToContainer takes an id mapping, and uses it to translate a host ID to
+// the remapped ID. If no map is provided, then the translation assumes a
+// 1-to-1 mapping and returns the passed in id.
+//
+// If you wish to map a (uid,gid) combination you should use the corresponding
+// IDMappings methods, which ensure that you are mapping the correct ID against
+// the correct mapping.
+func RawToContainer(hostID int, idMap []IDMap) (int, error) {
if idMap == nil {
return hostID, nil
}
@@ -114,10 +118,14 @@ func toContainer(hostID int, idMap []IDMap) (int, error) {
return -1, fmt.Errorf("Host ID %d cannot be mapped to a container ID", hostID)
}
-// toHost takes an id mapping and a remapped ID, and translates the
-// ID to the mapped host ID. If no map is provided, then the translation
-// assumes a 1-to-1 mapping and returns the passed in id #
-func toHost(contID int, idMap []IDMap) (int, error) {
+// RawToHost takes an id mapping and a remapped ID, and translates the ID to
+// the mapped host ID. If no map is provided, then the translation assumes a
+// 1-to-1 mapping and returns the passed in id.
+//
+// If you wish to map a (uid,gid) combination you should use the corresponding
+// IDMappings methods, which ensure that you are mapping the correct ID against
+// the correct mapping.
+func RawToHost(contID int, idMap []IDMap) (int, error) {
if idMap == nil {
return contID, nil
}
@@ -187,22 +195,22 @@ func (i *IDMappings) ToHost(pair IDPair) (IDPair, error) {
var err error
var target IDPair
- target.UID, err = toHost(pair.UID, i.uids)
+ target.UID, err = RawToHost(pair.UID, i.uids)
if err != nil {
return target, err
}
- target.GID, err = toHost(pair.GID, i.gids)
+ target.GID, err = RawToHost(pair.GID, i.gids)
return target, err
}
// ToContainer returns the container UID and GID for the host uid and gid
func (i *IDMappings) ToContainer(pair IDPair) (int, int, error) {
- uid, err := toContainer(pair.UID, i.uids)
+ uid, err := RawToContainer(pair.UID, i.uids)
if err != nil {
return -1, -1, err
}
- gid, err := toContainer(pair.GID, i.gids)
+ gid, err := RawToContainer(pair.GID, i.gids)
return uid, gid, err
}
diff --git a/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go b/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go
index e444a1bcc..6e6e3b22b 100644
--- a/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go
+++ b/vendor/github.com/containers/storage/pkg/idtools/idtools_supported.go
@@ -12,10 +12,14 @@ import (
#cgo LDFLAGS: -l subid
#include <shadow/subid.h>
#include <stdlib.h>
+#include <stdio.h>
const char *Prog = "storage";
+FILE *shadow_logfd = NULL;
+
struct subid_range get_range(struct subid_range *ranges, int i)
{
- return ranges[i];
+ shadow_logfd = stderr;
+ return ranges[i];
}
#if !defined(SUBID_ABI_MAJOR) || (SUBID_ABI_MAJOR < 4)