From b112533812e303b6fcfe447c8374810816daee0d Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Tue, 4 Aug 2020 16:42:38 +0200 Subject: [PATCH] Don't save exact duplicates when merging indexes --- internal/repository/index.go | 27 ++++++++++++++---- internal/repository/master_index_test.go | 35 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/internal/repository/index.go b/internal/repository/index.go index cb1951d5a..216d1afc1 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -464,7 +464,7 @@ func (idx *Index) TreePacks() restic.IDs { } // merge() merges indexes, i.e. idx.merge(idx2) merges the contents of idx2 into idx. -// idx2 is not changed by this method. +// During merging exact duplicates are removed; idx2 is not changed by this method. func (idx *Index) merge(idx2 *Index) error { idx.m.Lock() defer idx.m.Unlock() @@ -476,18 +476,35 @@ func (idx *Index) merge(idx2 *Index) error { } packlen := len(idx.packs) + // first append packs as they might be accessed when looking for duplicates below + idx.packs = append(idx.packs, idx2.packs...) + // copy all index entries of idx2 to idx for typ := range idx2.byType { m2 := &idx2.byType[typ] m := &idx.byType[typ] - m2.foreach(func(entry *indexEntry) bool { - // packIndex is changed as idx2.pack is appended to idx.pack, see below - m.add(entry.id, entry.packIndex+packlen, entry.offset, entry.length) + + // helper func to test if identical entry is contained in idx + hasIdenticalEntry := func(e2 *indexEntry) (found bool) { + m.foreachWithID(e2.id, func(e *indexEntry) { + b := idx.toPackedBlob(e, restic.BlobType(typ)) + b2 := idx2.toPackedBlob(e2, restic.BlobType(typ)) + if b.Length == b2.Length && b.Offset == b2.Offset && b.PackID == b2.PackID { + found = true + } + }) + return found + } + + m2.foreach(func(e2 *indexEntry) bool { + if !hasIdenticalEntry(e2) { + // packIndex needs to be changed as idx2.pack was appended to idx.pack, see above + m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length) + } return true }) } - idx.packs = append(idx.packs, idx2.packs...) idx.treePacks = append(idx.treePacks, idx2.treePacks...) idx.ids = append(idx.ids, idx2.ids...) idx.supersedes = append(idx.supersedes, idx2.supersedes...) diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go index 79ad5483c..5831fba63 100644 --- a/internal/repository/master_index_test.go +++ b/internal/repository/master_index_test.go @@ -1,6 +1,7 @@ package repository_test import ( + "context" "fmt" "math/rand" "testing" @@ -167,6 +168,14 @@ func TestMasterMergeFinalIndexes(t *testing.T) { rtest.Equals(t, []*repository.Index{idx1, idx2}, finalIndexes) mIdx.MergeFinalIndexes() + allIndexes := mIdx.All() + rtest.Equals(t, 1, len(allIndexes)) + + blobCount := 0 + for _ = range mIdx.Each(context.TODO()) { + blobCount++ + } + rtest.Equals(t, 2, blobCount) blobs := mIdx.Lookup(idInIdx1, restic.DataBlob) rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) @@ -176,6 +185,32 @@ func TestMasterMergeFinalIndexes(t *testing.T) { blobs = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") + + // merge another index containing identical blobs + idx3 := repository.NewIndex() + idx3.Store(blob1) + idx3.Store(blob2) + + mIdx.Insert(idx3) + finalIndexes = mIdx.FinalizeNotFinalIndexes() + rtest.Equals(t, []*repository.Index{idx3}, finalIndexes) + + mIdx.MergeFinalIndexes() + allIndexes = mIdx.All() + rtest.Equals(t, 1, len(allIndexes)) + + // Index should have same entries as before! + blobs = mIdx.Lookup(idInIdx1, restic.DataBlob) + rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) + + blobs = mIdx.Lookup(idInIdx2, restic.DataBlob) + rtest.Equals(t, []restic.PackedBlob{blob2}, blobs) + + blobCount = 0 + for _ = range mIdx.Each(context.TODO()) { + blobCount++ + } + rtest.Equals(t, 2, blobCount) } func createRandomMasterIndex(rng *rand.Rand, num, size int) (*repository.MasterIndex, restic.ID) {