diff --git a/cmd/restic/cmd_cat.go b/cmd/restic/cmd_cat.go index ccec9b5d9..8d11a9dc4 100644 --- a/cmd/restic/cmd_cat.go +++ b/cmd/restic/cmd_cat.go @@ -7,7 +7,6 @@ import ( "github.com/spf13/cobra" - "github.com/restic/restic/internal/backend" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -146,9 +145,9 @@ func runCat(ctx context.Context, gopts GlobalOptions, args []string) error { return nil case "pack": - h := backend.Handle{Type: restic.PackFile, Name: id.String()} - buf, err := backend.LoadAll(ctx, nil, repo.Backend(), h) - if err != nil { + buf, err := repo.LoadRaw(ctx, restic.PackFile, id) + // allow returning broken pack files + if buf == nil { return err } diff --git a/cmd/restic/cmd_debug.go b/cmd/restic/cmd_debug.go index 3abb9d7eb..93e627f27 100644 --- a/cmd/restic/cmd_debug.go +++ b/cmd/restic/cmd_debug.go @@ -492,8 +492,9 @@ func examinePack(ctx context.Context, opts DebugExamineOptions, repo restic.Repo } Printf(" file size is %v\n", fi.Size) - buf, err := backend.LoadAll(ctx, nil, repo.Backend(), h) - if err != nil { + buf, err := repo.LoadRaw(ctx, restic.PackFile, id) + // also process damaged pack files + if buf == nil { return err } gotID := restic.Hash(buf) diff --git a/internal/backend/test/tests.go b/internal/backend/test/tests.go index 963659fda..4c260d264 100644 --- a/internal/backend/test/tests.go +++ b/internal/backend/test/tests.go @@ -36,6 +36,19 @@ func beTest(ctx context.Context, be backend.Backend, h backend.Handle) (bool, er return err == nil, err } +func LoadAll(ctx context.Context, be backend.Backend, h backend.Handle) ([]byte, error) { + var buf []byte + err := be.Load(ctx, h, 0, 0, func(rd io.Reader) error { + var err error + buf, err = io.ReadAll(rd) + return err + }) + if err != nil { + return nil, err + } + return buf, nil +} + // TestStripPasswordCall tests that the StripPassword method of a factory can be called without crashing. // It does not verify whether passwords are removed correctly func (s *Suite[C]) TestStripPasswordCall(_ *testing.T) { @@ -94,7 +107,7 @@ func (s *Suite[C]) TestConfig(t *testing.T) { var testString = "Config" // create config and read it back - _, err := backend.LoadAll(context.TODO(), nil, b, backend.Handle{Type: backend.ConfigFile}) + _, err := LoadAll(context.TODO(), b, backend.Handle{Type: backend.ConfigFile}) if err == nil { t.Fatalf("did not get expected error for non-existing config") } @@ -110,7 +123,7 @@ func (s *Suite[C]) TestConfig(t *testing.T) { // same config for _, name := range []string{"", "foo", "bar", "0000000000000000000000000000000000000000000000000000000000000000"} { h := backend.Handle{Type: backend.ConfigFile, Name: name} - buf, err := backend.LoadAll(context.TODO(), nil, b, h) + buf, err := LoadAll(context.TODO(), b, h) if err != nil { t.Fatalf("unable to read config with name %q: %+v", name, err) } @@ -519,7 +532,7 @@ func (s *Suite[C]) TestSave(t *testing.T) { err := b.Save(context.TODO(), h, backend.NewByteReader(data, b.Hasher())) test.OK(t, err) - buf, err := backend.LoadAll(context.TODO(), nil, b, h) + buf, err := LoadAll(context.TODO(), b, h) test.OK(t, err) if len(buf) != len(data) { t.Fatalf("number of bytes does not match, want %v, got %v", len(data), len(buf)) @@ -821,7 +834,7 @@ func (s *Suite[C]) TestBackend(t *testing.T) { // test Load() h := backend.Handle{Type: tpe, Name: ts.id} - buf, err := backend.LoadAll(context.TODO(), nil, b, h) + buf, err := LoadAll(context.TODO(), b, h) test.OK(t, err) test.Equals(t, ts.data, string(buf)) diff --git a/internal/backend/utils.go b/internal/backend/utils.go deleted file mode 100644 index 919a1ad92..000000000 --- a/internal/backend/utils.go +++ /dev/null @@ -1,64 +0,0 @@ -package backend - -import ( - "bytes" - "context" - "encoding/hex" - "fmt" - "io" - - "github.com/minio/sha256-simd" - - "github.com/restic/restic/internal/debug" - "github.com/restic/restic/internal/errors" -) - -func verifyContentMatchesName(s string, data []byte) (bool, error) { - if len(s) != hex.EncodedLen(sha256.Size) { - return false, fmt.Errorf("invalid length for ID: %q", s) - } - - b, err := hex.DecodeString(s) - if err != nil { - return false, fmt.Errorf("invalid ID: %s", err) - } - var id [sha256.Size]byte - copy(id[:], b) - - hashed := sha256.Sum256(data) - return id == hashed, nil -} - -// LoadAll reads all data stored in the backend for the handle into the given -// buffer, which is truncated. If the buffer is not large enough or nil, a new -// one is allocated. -func LoadAll(ctx context.Context, buf []byte, be Backend, h Handle) ([]byte, error) { - retriedInvalidData := false - err := be.Load(ctx, h, 0, 0, func(rd io.Reader) error { - // make sure this is idempotent, in case an error occurs this function may be called multiple times! - wr := bytes.NewBuffer(buf[:0]) - _, cerr := io.Copy(wr, rd) - if cerr != nil { - return cerr - } - buf = wr.Bytes() - - // retry loading damaged data only once. If a file fails to download correctly - // the second time, then it is likely corrupted at the backend. Return the data - // to the caller in that case to let it decide what to do with the data. - if !retriedInvalidData && h.Type != ConfigFile { - if matches, err := verifyContentMatchesName(h.Name, buf); err == nil && !matches { - debug.Log("retry loading broken blob %v", h) - retriedInvalidData = true - return errors.Errorf("loadAll(%v): invalid data returned", h) - } - } - return nil - }) - - if err != nil { - return nil, err - } - - return buf, nil -} diff --git a/internal/cache/backend_test.go b/internal/cache/backend_test.go index 68fbb02b3..c8d667854 100644 --- a/internal/cache/backend_test.go +++ b/internal/cache/backend_test.go @@ -12,12 +12,13 @@ import ( "github.com/pkg/errors" "github.com/restic/restic/internal/backend" "github.com/restic/restic/internal/backend/mem" + backendtest "github.com/restic/restic/internal/backend/test" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/test" ) func loadAndCompare(t testing.TB, be backend.Backend, h backend.Handle, data []byte) { - buf, err := backend.LoadAll(context.TODO(), nil, be, h) + buf, err := backendtest.LoadAll(context.TODO(), be, h) if err != nil { t.Fatal(err) } @@ -140,7 +141,7 @@ func TestErrorBackend(t *testing.T) { loadTest := func(wg *sync.WaitGroup, be backend.Backend) { defer wg.Done() - buf, err := backend.LoadAll(context.TODO(), nil, be, h) + buf, err := backendtest.LoadAll(context.TODO(), be, h) if err == testErr { return } @@ -165,38 +166,3 @@ func TestErrorBackend(t *testing.T) { wg.Wait() } - -func TestBackendRemoveBroken(t *testing.T) { - be := mem.New() - c := TestNewCache(t) - - h, data := randomData(5234142) - // save directly in backend - save(t, be, h, data) - - // prime cache with broken copy - broken := append([]byte{}, data...) - broken[0] ^= 0xff - err := c.Save(h, bytes.NewReader(broken)) - test.OK(t, err) - - // loadall retries if broken data was returned - buf, err := backend.LoadAll(context.TODO(), nil, c.Wrap(be), h) - test.OK(t, err) - - if !bytes.Equal(buf, data) { - t.Fatalf("wrong data returned") - } - - // check that the cache now contains the correct data - rd, err := c.load(h, 0, 0) - defer func() { - _ = rd.Close() - }() - test.OK(t, err) - cached, err := io.ReadAll(rd) - test.OK(t, err) - if !bytes.Equal(cached, data) { - t.Fatalf("wrong data cache") - } -} diff --git a/internal/repository/key.go b/internal/repository/key.go index 0604b44df..08f997544 100644 --- a/internal/repository/key.go +++ b/internal/repository/key.go @@ -178,8 +178,7 @@ func SearchKey(ctx context.Context, s *Repository, password string, maxKeys int, // LoadKey loads a key from the backend. func LoadKey(ctx context.Context, s *Repository, id restic.ID) (k *Key, err error) { - h := backend.Handle{Type: restic.KeyFile, Name: id.String()} - data, err := backend.LoadAll(ctx, nil, s.be, h) + data, err := s.LoadRaw(ctx, restic.KeyFile, id) if err != nil { return nil, err } diff --git a/internal/repository/raw.go b/internal/repository/raw.go new file mode 100644 index 000000000..d173908d4 --- /dev/null +++ b/internal/repository/raw.go @@ -0,0 +1,63 @@ +package repository + +import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/restic/restic/internal/backend" + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/restic" +) + +// LoadRaw reads all data stored in the backend for the file with id and filetype t. +// If the backend returns data that does not match the id, then the buffer is returned +// along with an error that is a restic.ErrInvalidData error. +func (r *Repository) LoadRaw(ctx context.Context, t restic.FileType, id restic.ID) (buf []byte, err error) { + h := backend.Handle{Type: t, Name: id.String()} + + ctx, cancel := context.WithCancel(ctx) + + var dataErr error + retriedInvalidData := false + err = r.be.Load(ctx, h, 0, 0, func(rd io.Reader) error { + // make sure this is idempotent, in case an error occurs this function may be called multiple times! + wr := bytes.NewBuffer(buf[:0]) + _, cerr := io.Copy(wr, rd) + if cerr != nil { + return cerr + } + buf = wr.Bytes() + + // retry loading damaged data only once. If a file fails to download correctly + // the second time, then it is likely corrupted at the backend. + if h.Type != backend.ConfigFile { + if id != restic.Hash(buf) { + if !retriedInvalidData { + debug.Log("retry loading broken blob %v", h) + retriedInvalidData = true + } else { + // with a canceled context there is not guarantee which error will + // be returned by `be.Load`. + dataErr = fmt.Errorf("loadAll(%v): %w", h, restic.ErrInvalidData) + cancel() + } + return restic.ErrInvalidData + } + } + return nil + }) + + // Return corrupted data to the caller if it is still broken the second time to + // let the caller decide what to do with the data. + if dataErr != nil { + return buf, dataErr + } + + if err != nil { + return nil, err + } + + return buf, nil +} diff --git a/internal/backend/utils_test.go b/internal/repository/raw_test.go similarity index 50% rename from internal/backend/utils_test.go rename to internal/repository/raw_test.go index ad9540e54..42be8827b 100644 --- a/internal/backend/utils_test.go +++ b/internal/repository/raw_test.go @@ -1,4 +1,4 @@ -package backend_test +package repository_test import ( "bytes" @@ -10,6 +10,8 @@ import ( "github.com/restic/restic/internal/backend" "github.com/restic/restic/internal/backend/mem" "github.com/restic/restic/internal/backend/mock" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" ) @@ -19,9 +21,10 @@ const MiB = 1 << 20 func TestLoadAll(t *testing.T) { b := mem.New() - var buf []byte + repo, err := repository.New(b, repository.Options{}) + rtest.OK(t, err) - for i := 0; i < 20; i++ { + for i := 0; i < 5; i++ { data := rtest.Random(23+i, rand.Intn(MiB)+500*KiB) id := restic.Hash(data) @@ -29,7 +32,7 @@ func TestLoadAll(t *testing.T) { err := b.Save(context.TODO(), h, backend.NewByteReader(data, b.Hasher())) rtest.OK(t, err) - buf, err := backend.LoadAll(context.TODO(), buf, b, backend.Handle{Type: backend.PackFile, Name: id.String()}) + buf, err := repo.LoadRaw(context.TODO(), backend.PackFile, id) rtest.OK(t, err) if len(buf) != len(data) { @@ -44,16 +47,6 @@ func TestLoadAll(t *testing.T) { } } -func save(t testing.TB, be backend.Backend, buf []byte) backend.Handle { - id := restic.Hash(buf) - h := backend.Handle{Name: id.String(), Type: backend.PackFile} - err := be.Save(context.TODO(), h, backend.NewByteReader(buf, be.Hasher())) - if err != nil { - t.Fatal(err) - } - return h -} - type quickRetryBackend struct { backend.Backend } @@ -69,6 +62,8 @@ func (be *quickRetryBackend) Load(ctx context.Context, h backend.Handle, length func TestLoadAllBroken(t *testing.T) { b := mock.NewBackend() + repo, err := repository.New(b, repository.Options{}) + rtest.OK(t, err) data := rtest.Random(23, rand.Intn(MiB)+500*KiB) id := restic.Hash(data) @@ -80,70 +75,17 @@ func TestLoadAllBroken(t *testing.T) { } // must fail on first try - _, err := backend.LoadAll(context.TODO(), nil, b, backend.Handle{Type: backend.PackFile, Name: id.String()}) - if err == nil { - t.Fatalf("missing expected error") - } + _, err = repo.LoadRaw(context.TODO(), backend.PackFile, id) + rtest.Assert(t, errors.Is(err, restic.ErrInvalidData), "missing expected ErrInvalidData error, got %v", err) // must return the broken data after a retry be := &quickRetryBackend{Backend: b} - buf, err := backend.LoadAll(context.TODO(), nil, be, backend.Handle{Type: backend.PackFile, Name: id.String()}) + repo, err = repository.New(be, repository.Options{}) rtest.OK(t, err) + buf, err := repo.LoadRaw(context.TODO(), backend.PackFile, id) + rtest.Assert(t, errors.Is(err, restic.ErrInvalidData), "missing expected ErrInvalidData error, got %v", err) if !bytes.Equal(buf, data) { t.Fatalf("wrong data returned") } } - -func TestLoadAllAppend(t *testing.T) { - b := mem.New() - - h1 := save(t, b, []byte("foobar test string")) - randomData := rtest.Random(23, rand.Intn(MiB)+500*KiB) - h2 := save(t, b, randomData) - - var tests = []struct { - handle backend.Handle - buf []byte - want []byte - }{ - { - handle: h1, - buf: nil, - want: []byte("foobar test string"), - }, - { - handle: h1, - buf: []byte("xxx"), - want: []byte("foobar test string"), - }, - { - handle: h2, - buf: nil, - want: randomData, - }, - { - handle: h2, - buf: make([]byte, 0, 200), - want: randomData, - }, - { - handle: h2, - buf: []byte("foobarbaz"), - want: randomData, - }, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - buf, err := backend.LoadAll(context.TODO(), test.buf, b, test.handle) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(buf, test.want) { - t.Errorf("wrong data returned, want %q, got %q", test.want, buf) - } - }) - } -} diff --git a/internal/repository/repair_pack_test.go b/internal/repository/repair_pack_test.go index 078017d21..0d16d251f 100644 --- a/internal/repository/repair_pack_test.go +++ b/internal/repository/repair_pack_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/restic/restic/internal/backend" + backendtest "github.com/restic/restic/internal/backend/test" "github.com/restic/restic/internal/index" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -24,7 +25,7 @@ func listBlobs(repo restic.Repository) restic.BlobSet { } func replaceFile(t *testing.T, repo restic.Repository, h backend.Handle, damage func([]byte) []byte) { - buf, err := backend.LoadAll(context.TODO(), nil, repo.Backend(), h) + buf, err := backendtest.LoadAll(context.TODO(), repo.Backend(), h) test.OK(t, err) buf = damage(buf) test.OK(t, repo.Backend().Remove(context.TODO(), h)) diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 48a56a1fd..d7481117a 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -9,13 +9,13 @@ import ( "math/rand" "os" "path/filepath" - "strings" "testing" "time" "github.com/restic/restic/internal/backend" "github.com/restic/restic/internal/backend/local" "github.com/restic/restic/internal/crypto" + "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/index" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -259,7 +259,7 @@ func TestRepositoryLoadUnpackedBroken(t *testing.T) { if err == nil { t.Fatal("missing expected error") } - rtest.Assert(t, strings.Contains(err.Error(), "invalid data returned"), "unexpected error: %v", err) + rtest.Assert(t, errors.Is(err, restic.ErrInvalidData), "unexpected error: %v", err) } type damageOnceBackend struct { diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 7a3389e00..5393e0701 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -57,6 +57,11 @@ type Repository interface { // LoadUnpacked loads and decrypts the file with the given type and ID. LoadUnpacked(ctx context.Context, t FileType, id ID) (data []byte, err error) SaveUnpacked(context.Context, FileType, []byte) (ID, error) + + // LoadRaw reads all data stored in the backend for the file with id and filetype t. + // If the backend returns data that does not match the id, then the buffer is returned + // along with an error that is a restic.ErrInvalidData error. + LoadRaw(ctx context.Context, t FileType, id ID) (data []byte, err error) } type FileType = backend.FileType