Merge pull request #3099 from MichaelEischer/check-less-memory

This commit is contained in:
Alexander Neumann 2020-11-16 10:11:25 +01:00 committed by GitHub
commit 5f3b802ee7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 56 deletions

View File

@ -0,0 +1,6 @@
Enhancement: Reduce memory usage of check command
The check command now requires less memory if it is run with the
`--check-unused` option.
https://github.com/restic/restic/pull/3099

View File

@ -193,7 +193,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
} }
} }
chkr := checker.New(repo) chkr := checker.New(repo, opts.CheckUnused)
Verbosef("load indexes\n") Verbosef("load indexes\n")
hints, errs := chkr.LoadIndex(gopts.ctx) hints, errs := chkr.LoadIndex(gopts.ctx)
@ -255,7 +255,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
} }
if opts.CheckUnused { if opts.CheckUnused {
for _, id := range chkr.UnusedBlobs() { for _, id := range chkr.UnusedBlobs(gopts.ctx) {
Verbosef("unused blob %v\n", id) Verbosef("unused blob %v\n", id)
errorsFound = true errorsFound = true
} }

View File

@ -25,31 +25,25 @@ type Checker struct {
packs map[restic.ID]int64 packs map[restic.ID]int64
blobRefs struct { blobRefs struct {
sync.Mutex sync.Mutex
// see flags below M restic.BlobSet
M map[restic.BlobHandle]blobStatus
} }
trackUnused bool
masterIndex *repository.MasterIndex masterIndex *repository.MasterIndex
repo restic.Repository repo restic.Repository
} }
type blobStatus uint8
const (
blobStatusExists blobStatus = 1 << iota
blobStatusReferenced
)
// New returns a new checker which runs on repo. // New returns a new checker which runs on repo.
func New(repo restic.Repository) *Checker { func New(repo restic.Repository, trackUnused bool) *Checker {
c := &Checker{ c := &Checker{
packs: make(map[restic.ID]int64), packs: make(map[restic.ID]int64),
masterIndex: repository.NewMasterIndex(), masterIndex: repository.NewMasterIndex(),
repo: repo, repo: repo,
trackUnused: trackUnused,
} }
c.blobRefs.M = make(map[restic.BlobHandle]blobStatus) c.blobRefs.M = restic.NewBlobSet()
return c return c
} }
@ -162,8 +156,6 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
debug.Log("process blobs") debug.Log("process blobs")
cnt := 0 cnt := 0
for blob := range res.Index.Each(wgCtx) { for blob := range res.Index.Each(wgCtx) {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
c.blobRefs.M[h] = blobStatusExists
cnt++ cnt++
if _, ok := packToIndex[blob.PackID]; !ok { if _, ok := packToIndex[blob.PackID]; !ok {
@ -529,9 +521,11 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
// even when a file references a tree blob // even when a file references a tree blob
c.blobRefs.Lock() c.blobRefs.Lock()
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob} h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
status := c.blobRefs.M[h] blobReferenced := c.blobRefs.M.Has(h)
// noop if already referenced
c.blobRefs.M.Insert(h)
c.blobRefs.Unlock() c.blobRefs.Unlock()
if (status & blobStatusReferenced) != 0 { if blobReferenced {
continue continue
} }
@ -550,10 +544,6 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
case loadCh <- nextTreeID: case loadCh <- nextTreeID:
outstandingLoadTreeJobs++ outstandingLoadTreeJobs++
loadCh = nil loadCh = nil
c.blobRefs.Lock()
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
c.blobRefs.M[h] |= blobStatusReferenced
c.blobRefs.Unlock()
case j, ok := <-inCh: case j, ok := <-inCh:
if !ok { if !ok {
@ -638,8 +628,6 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) { func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
debug.Log("checking tree %v", id) debug.Log("checking tree %v", id)
var blobs []restic.ID
for _, node := range tree.Nodes { for _, node := range tree.Nodes {
switch node.Type { switch node.Type {
case "file": case "file":
@ -653,13 +641,28 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)}) errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
continue continue
} }
blobs = append(blobs, blobID)
blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob) blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob)
if !found { if !found {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d size could not be found", node.Name, b)}) debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %v not found in index", node.Name, blobID)})
} }
size += uint64(blobSize) size += uint64(blobSize)
} }
if c.trackUnused {
// loop a second time to keep the locked section as short as possible
c.blobRefs.Lock()
for _, blobID := range node.Content {
if blobID.IsNull() {
continue
}
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
c.blobRefs.M.Insert(h)
debug.Log("blob %v is referenced", blobID)
}
c.blobRefs.Unlock()
}
case "dir": case "dir":
if node.Subtree == nil { if node.Subtree == nil {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)}) errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
@ -683,31 +686,26 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
} }
} }
for _, blobID := range blobs {
c.blobRefs.Lock()
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
if (c.blobRefs.M[h] & blobStatusExists) == 0 {
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")})
}
c.blobRefs.M[h] |= blobStatusReferenced
debug.Log("blob %v is referenced", blobID)
c.blobRefs.Unlock()
}
return errs return errs
} }
// UnusedBlobs returns all blobs that have never been referenced. // UnusedBlobs returns all blobs that have never been referenced.
func (c *Checker) UnusedBlobs() (blobs restic.BlobHandles) { func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
if !c.trackUnused {
panic("only works when tracking blob references")
}
c.blobRefs.Lock() c.blobRefs.Lock()
defer c.blobRefs.Unlock() defer c.blobRefs.Unlock()
debug.Log("checking %d blobs", len(c.blobRefs.M)) debug.Log("checking %d blobs", len(c.blobRefs.M))
for id, flags := range c.blobRefs.M { ctx, cancel := context.WithCancel(ctx)
if (flags & blobStatusReferenced) == 0 { defer cancel()
debug.Log("blob %v not referenced", id)
blobs = append(blobs, id) for blob := range c.repo.Index().Each(ctx) {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !c.blobRefs.M.Has(h) {
debug.Log("blob %v not referenced", h)
blobs = append(blobs, h)
} }
} }

View File

@ -61,7 +61,7 @@ func TestCheckRepo(t *testing.T) {
repo := repository.TestOpenLocal(t, repodir) repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo) chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 { if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs) t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -87,7 +87,7 @@ func TestMissingPack(t *testing.T) {
} }
test.OK(t, repo.Backend().Remove(context.TODO(), packHandle)) test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))
chkr := checker.New(repo) chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 { if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs) t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -123,7 +123,7 @@ func TestUnreferencedPack(t *testing.T) {
} }
test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle)) test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))
chkr := checker.New(repo) chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 { if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs) t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -168,7 +168,7 @@ func TestUnreferencedBlobs(t *testing.T) {
sort.Sort(unusedBlobsBySnapshot) sort.Sort(unusedBlobsBySnapshot)
chkr := checker.New(repo) chkr := checker.New(repo, true)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 { if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs) t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -181,7 +181,7 @@ func TestUnreferencedBlobs(t *testing.T) {
test.OKs(t, checkPacks(chkr)) test.OKs(t, checkPacks(chkr))
test.OKs(t, checkStruct(chkr)) test.OKs(t, checkStruct(chkr))
blobs := chkr.UnusedBlobs() blobs := chkr.UnusedBlobs(context.TODO())
sort.Sort(blobs) sort.Sort(blobs)
test.Equals(t, unusedBlobsBySnapshot, blobs) test.Equals(t, unusedBlobsBySnapshot, blobs)
@ -241,7 +241,7 @@ func TestModifiedIndex(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
chkr := checker.New(repo) chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) == 0 { if len(errs) == 0 {
t.Fatalf("expected errors not found") t.Fatalf("expected errors not found")
@ -264,7 +264,7 @@ func TestDuplicatePacksInIndex(t *testing.T) {
repo := repository.TestOpenLocal(t, repodir) repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo) chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(hints) == 0 { if len(hints) == 0 {
t.Fatalf("did not get expected checker hints for duplicate packs in indexes") t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
@ -336,7 +336,7 @@ func TestCheckerModifiedData(t *testing.T) {
checkRepo := repository.New(beError) checkRepo := repository.New(beError)
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, "")) test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
chkr := checker.New(checkRepo) chkr := checker.New(checkRepo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 { if len(errs) > 0 {
@ -398,7 +398,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) {
loadedTrees: restic.NewIDSet(), loadedTrees: restic.NewIDSet(),
} }
chkr := checker.New(checkRepo) chkr := checker.New(checkRepo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 { if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs) t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -509,7 +509,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
UnblockChannel: make(chan struct{}), UnblockChannel: make(chan struct{}),
} }
chkr := checker.New(delayRepo) chkr := checker.New(delayRepo, false)
go func() { go func() {
<-ctx.Done() <-ctx.Done()
@ -544,7 +544,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun
repo := repository.TestOpenLocal(t, repodir) repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo) chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 { if len(errs) > 0 {
defer cleanup() defer cleanup()

View File

@ -9,7 +9,7 @@ import (
// TestCheckRepo runs the checker on repo. // TestCheckRepo runs the checker on repo.
func TestCheckRepo(t testing.TB, repo restic.Repository) { func TestCheckRepo(t testing.TB, repo restic.Repository) {
chkr := New(repo) chkr := New(repo, true)
hints, errs := chkr.LoadIndex(context.TODO()) hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) != 0 { if len(errs) != 0 {
@ -37,7 +37,7 @@ func TestCheckRepo(t testing.TB, repo restic.Repository) {
} }
// unused blobs // unused blobs
blobs := chkr.UnusedBlobs() blobs := chkr.UnusedBlobs(context.TODO())
if len(blobs) > 0 { if len(blobs) > 0 {
t.Errorf("unused blobs found: %v", blobs) t.Errorf("unused blobs found: %v", blobs)
} }

View File

@ -360,7 +360,7 @@ func TestIndexSave(t *testing.T) {
} }
} }
checker := checker.New(repo) checker := checker.New(repo, false)
hints, errs := checker.LoadIndex(context.TODO()) hints, errs := checker.LoadIndex(context.TODO())
for _, h := range hints { for _, h := range hints {
t.Logf("hint: %v\n", h) t.Logf("hint: %v\n", h)