From fc793173a69d2d66ed7384655cd20e81dd0d0016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Friedrich=20Dreyer?= Date: Mon, 10 Jan 2022 14:09:48 +0000 Subject: [PATCH] make archiver id based MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jörn Friedrich Dreyer --- .../unreleased/make-archiver-id-based.md | 5 + internal/http/services/archiver/handler.go | 43 +++++---- .../services/archiver/manager/archiver.go | 94 +++---------------- .../archiver/manager/archiver_test.go | 88 ++--------------- pkg/storage/utils/downloader/downloader.go | 9 +- .../utils/downloader/mock/downloader_mock.go | 6 +- pkg/storage/utils/walker/mock/walker_mock.go | 12 ++- pkg/storage/utils/walker/walker.go | 44 ++++----- 8 files changed, 87 insertions(+), 214 deletions(-) create mode 100644 changelog/unreleased/make-archiver-id-based.md diff --git a/changelog/unreleased/make-archiver-id-based.md b/changelog/unreleased/make-archiver-id-based.md new file mode 100644 index 0000000000..658a7e7a97 --- /dev/null +++ b/changelog/unreleased/make-archiver-id-based.md @@ -0,0 +1,5 @@ +Change: make archiver id based + +The archiver now uses ids to walk the tree instead of paths + +https://github.com/cs3org/reva/pull/2429 \ No newline at end of file diff --git a/internal/http/services/archiver/handler.go b/internal/http/services/archiver/handler.go index ee2c05f111..a660573ced 100644 --- a/internal/http/services/archiver/handler.go +++ b/internal/http/services/archiver/handler.go @@ -119,24 +119,31 @@ func (c *Config) init() { c.GatewaySvc = sharedconf.GetGatewaySVC(c.GatewaySvc) } -func (s *svc) getFiles(ctx context.Context, files, ids []string) ([]string, error) { - if len(files) == 0 && len(ids) == 0 { - return nil, errtypes.BadRequest("file and id lists are both empty") +func (s *svc) getResources(ctx context.Context, paths, ids []string) ([]*provider.ResourceId, error) { + if len(paths) == 0 && len(ids) == 0 { + return nil, errtypes.BadRequest("path and id lists are both empty") } - f := make([]string, 0, len(files)+len(ids)) + resources := make([]*provider.ResourceId, 0, len(paths)+len(ids)) for _, id := range ids { // id is base64 encoded and after decoding has the form : - ref := resourceid.OwnCloudResourceIDUnwrap(id) - if ref == nil { + decodedID := resourceid.OwnCloudResourceIDUnwrap(id) + if decodedID == nil { return nil, errors.New("could not unwrap given file id") } + resources = append(resources, decodedID) + + } + + for _, p := range paths { + // id is base64 encoded and after decoding has the form : + resp, err := s.gtwClient.Stat(ctx, &provider.StatRequest{ Ref: &provider.Reference{ - ResourceId: ref, + Path: p, }, }) @@ -144,27 +151,28 @@ func (s *svc) getFiles(ctx context.Context, files, ids []string) ([]string, erro case err != nil: return nil, err case resp.Status.Code == rpc.Code_CODE_NOT_FOUND: - return nil, errtypes.NotFound(id) + return nil, errtypes.NotFound(p) case resp.Status.Code != rpc.Code_CODE_OK: - return nil, errtypes.InternalError(fmt.Sprintf("error getting stats from %s", id)) + return nil, errtypes.InternalError(fmt.Sprintf("error stating %s", p)) } - f = append(f, resp.Info.Path) + resources = append(resources, resp.Info.Id) } - f = append(f, files...) - // check if all the folders are allowed to be archived - err := s.allAllowed(f) + /* FIXME bring back filtering + err := s.allAllowed(resources) if err != nil { return nil, err } + */ - return f, nil + return resources, nil } // return true if path match with at least with one allowed folder regex +/* func (s *svc) isPathAllowed(path string) bool { for _, reg := range s.allowedFolders { if reg.MatchString(path) { @@ -187,6 +195,7 @@ func (s *svc) allAllowed(paths []string) error { } return nil } +*/ func (s *svc) writeHTTPError(rw http.ResponseWriter, err error) { s.log.Error().Msg(err.Error()) @@ -220,13 +229,13 @@ func (s *svc) Handler() http.Handler { ids = []string{} } - files, err := s.getFiles(ctx, paths, ids) + resources, err := s.getResources(ctx, paths, ids) if err != nil { s.writeHTTPError(rw, err) return } - arch, err := manager.NewArchiver(files, s.walker, s.downloader, manager.Config{ + arch, err := manager.NewArchiver(resources, s.walker, s.downloader, manager.Config{ MaxNumFiles: s.config.MaxNumFiles, MaxSize: s.config.MaxSize, }) @@ -244,7 +253,7 @@ func (s *svc) Handler() http.Handler { archName += ".tar" } - s.log.Debug().Msg("Requested the following files/folders to archive: " + render.Render(files)) + s.log.Debug().Msg("Requested the following resoucres to archive: " + render.Render(resources)) rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archName)) rw.Header().Set("Content-Transfer-Encoding", "binary") diff --git a/internal/http/services/archiver/manager/archiver.go b/internal/http/services/archiver/manager/archiver.go index 7c90b6a459..58a513d122 100644 --- a/internal/http/services/archiver/manager/archiver.go +++ b/internal/http/services/archiver/manager/archiver.go @@ -23,7 +23,6 @@ import ( "archive/zip" "context" "io" - "path" "path/filepath" "time" @@ -40,27 +39,20 @@ type Config struct { // Archiver is the struct able to create an archive type Archiver struct { - files []string - dir string + resources []*provider.ResourceId walker walker.Walker downloader downloader.Downloader config Config } // NewArchiver creates a new archiver able to create an archive containing the files in the list -func NewArchiver(files []string, w walker.Walker, d downloader.Downloader, config Config) (*Archiver, error) { - if len(files) == 0 { +func NewArchiver(r []*provider.ResourceId, w walker.Walker, d downloader.Downloader, config Config) (*Archiver, error) { + if len(r) == 0 { return nil, ErrEmptyList{} } - dir := getDeepestCommonDir(files) - if pathIn(files, dir) { - dir = filepath.Dir(dir) - } - arc := &Archiver{ - dir: dir, - files: files, + resources: r, walker: w, downloader: d, config: config, @@ -68,60 +60,15 @@ func NewArchiver(files []string, w walker.Walker, d downloader.Downloader, confi return arc, nil } -// pathIn verifies that the path `f`is in the `files`list -func pathIn(files []string, f string) bool { - f = filepath.Clean(f) - for _, file := range files { - if filepath.Clean(file) == f { - return true - } - } - return false -} - -func getDeepestCommonDir(files []string) string { - - if len(files) == 0 { - return "" - } - - // find the maximum common substring from left - res := path.Clean(files[0]) + "/" - - for _, file := range files[1:] { - file = path.Clean(file) + "/" - - if len(file) < len(res) { - res, file = file, res - } - - for i := 0; i < len(res); i++ { - if res[i] != file[i] { - res = res[:i] - } - } - - } - - // the common substring could be between two / - inside a file name - for i := len(res) - 1; i >= 0; i-- { - if res[i] == '/' { - res = res[:i+1] - break - } - } - return filepath.Clean(res) -} - // CreateTar creates a tar and write it into the dst Writer func (a *Archiver) CreateTar(ctx context.Context, dst io.Writer) error { w := tar.NewWriter(dst) var filesCount, sizeFiles int64 - for _, root := range a.files { + for _, root := range a.resources { - err := a.walker.Walk(ctx, root, func(path string, info *provider.ResourceInfo, err error) error { + err := a.walker.Walk(ctx, root, func(wd string, info *provider.ResourceInfo, err error) error { if err != nil { return err } @@ -143,15 +90,8 @@ func (a *Archiver) CreateTar(ctx context.Context, dst io.Writer) error { } } - // TODO (gdelmont): remove duplicates if the resources requested overlaps - fileName, err := filepath.Rel(a.dir, path) - - if err != nil { - return err - } - header := tar.Header{ - Name: fileName, + Name: filepath.Join(wd, info.Path), ModTime: time.Unix(int64(info.Mtime.Seconds), 0), } @@ -171,7 +111,7 @@ func (a *Archiver) CreateTar(ctx context.Context, dst io.Writer) error { } if !isDir { - err = a.downloader.Download(ctx, path, w) + err = a.downloader.Download(ctx, info.Id, w) if err != nil { return err } @@ -193,9 +133,9 @@ func (a *Archiver) CreateZip(ctx context.Context, dst io.Writer) error { var filesCount, sizeFiles int64 - for _, root := range a.files { + for _, root := range a.resources { - err := a.walker.Walk(ctx, root, func(path string, info *provider.ResourceInfo, err error) error { + err := a.walker.Walk(ctx, root, func(wd string, info *provider.ResourceInfo, err error) error { if err != nil { return err } @@ -217,18 +157,8 @@ func (a *Archiver) CreateZip(ctx context.Context, dst io.Writer) error { } } - // TODO (gdelmont): remove duplicates if the resources requested overlaps - fileName, err := filepath.Rel(a.dir, path) - if err != nil { - return err - } - - if fileName == "" { - return nil - } - header := zip.FileHeader{ - Name: fileName, + Name: filepath.Join(wd, info.Path), Modified: time.Unix(int64(info.Mtime.Seconds), 0), } @@ -244,7 +174,7 @@ func (a *Archiver) CreateZip(ctx context.Context, dst io.Writer) error { } if !isDir { - err = a.downloader.Download(ctx, path, dst) + err = a.downloader.Download(ctx, info.Id, dst) if err != nil { return err } diff --git a/internal/http/services/archiver/manager/archiver_test.go b/internal/http/services/archiver/manager/archiver_test.go index fd102c5549..c1323edff6 100644 --- a/internal/http/services/archiver/manager/archiver_test.go +++ b/internal/http/services/archiver/manager/archiver_test.go @@ -30,82 +30,12 @@ import ( "strings" "testing" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" downMock "github.com/cs3org/reva/pkg/storage/utils/downloader/mock" walkerMock "github.com/cs3org/reva/pkg/storage/utils/walker/mock" "github.com/cs3org/reva/pkg/test" ) -func TestGetDeepestCommonDir(t *testing.T) { - tests := []struct { - name string - paths []string - expected string - }{ - { - name: "no paths", - paths: []string{}, - expected: "", - }, - { - name: "one path", - paths: []string{"/aa/bb/cc"}, - expected: "/aa/bb/cc", - }, - { - name: "root as common parent", - paths: []string{"/aa/bb/bb", "/bb/cc"}, - expected: "/", - }, - { - name: "common parent", - paths: []string{"/aa/bb/cc", "/aa/bb/dd"}, - expected: "/aa/bb", - }, - { - name: "common parent", - paths: []string{"/aa/bb/cc", "/aa/bb/dd", "/aa/test"}, - expected: "/aa", - }, - { - name: "common parent", - paths: []string{"/aa/bb/cc/", "/aa/bb/dd/", "/aa/test/"}, - expected: "/aa", - }, - { - name: "one is common parent", - paths: []string{"/aa", "/aa/bb/dd", "/aa/test"}, - expected: "/aa", - }, - { - name: "one is common parent", - paths: []string{"/aa/", "/aa/bb/dd/", "/aa/test"}, - expected: "/aa", - }, - { - name: "one is common parent", - paths: []string{"/aa/bb/dd", "/aa/", "/aa/test"}, - expected: "/aa", - }, - { - name: "one is common parent", - paths: []string{"/reva/einstein/test", "/reva/einstein"}, - expected: "/reva/einstein", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - - res := getDeepestCommonDir(tt.paths) - if res != tt.expected { - t.Fatalf("getDeepestCommondDir() failed: paths=%+v expected=%s got=%s", tt.paths, tt.expected, res) - } - - }) - - } -} - func UnTar(dir string, r io.Reader) error { tr := tar.NewReader(r) for { @@ -482,15 +412,15 @@ func TestCreateTar(t *testing.T) { } defer cleanup() - filesAbs := []string{} + resources := []*provider.ResourceId{} for _, f := range tt.files { - filesAbs = append(filesAbs, path.Join(tmpdir, f)) + resources = append(resources, &provider.ResourceId{OpaqueId: path.Join(tmpdir, f)}) } w := walkerMock.NewWalker() d := downMock.NewDownloader() - arch, err := NewArchiver(filesAbs, w, d, tt.config) + arch, err := NewArchiver(resources, w, d, tt.config) if err != nil { t.Fatal(err) } @@ -520,7 +450,7 @@ func TestCreateTar(t *testing.T) { } defer cleanup() if !test.DirEquals(tarTmpDir, expectedTmp) { - t.Fatalf("untar dir different from expected") + t.Fatalf("untar dir %s different from expected %s", tarTmpDir, expectedTmp) } } @@ -930,15 +860,15 @@ func TestCreateZip(t *testing.T) { } defer cleanup() - filesAbs := []string{} + resources := []*provider.ResourceId{} for _, f := range tt.files { - filesAbs = append(filesAbs, path.Join(tmpdir, f)) + resources = append(resources, &provider.ResourceId{OpaqueId: path.Join(tmpdir, f)}) } w := walkerMock.NewWalker() d := downMock.NewDownloader() - arch, err := NewArchiver(filesAbs, w, d, tt.config) + arch, err := NewArchiver(resources, w, d, tt.config) if err != nil { t.Fatal(err) } @@ -968,7 +898,7 @@ func TestCreateZip(t *testing.T) { } defer cleanup() if !test.DirEquals(zipTmpDir, expectedTmp) { - t.Fatalf("unzip dir different from expected") + t.Fatalf("unzip dir %s different from expected %s", zipTmpDir, expectedTmp) } } diff --git a/pkg/storage/utils/downloader/downloader.go b/pkg/storage/utils/downloader/downloader.go index 724174c897..85d57055d7 100644 --- a/pkg/storage/utils/downloader/downloader.go +++ b/pkg/storage/utils/downloader/downloader.go @@ -35,7 +35,7 @@ import ( // Downloader is the interface implemented by the objects that are able to // download a path into a destination Writer type Downloader interface { - Download(context.Context, string, io.Writer) error + Download(context.Context, *provider.ResourceId, io.Writer) error } type revaDownloader struct { @@ -61,10 +61,11 @@ func getDownloadProtocol(protocols []*gateway.FileDownloadProtocol, prot string) } // Download downloads a resource given the path to the dst Writer -func (r *revaDownloader) Download(ctx context.Context, path string, dst io.Writer) error { +func (r *revaDownloader) Download(ctx context.Context, id *provider.ResourceId, dst io.Writer) error { downResp, err := r.gtw.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{ Ref: &provider.Reference{ - Path: path, + ResourceId: id, + Path: ".", }, }) @@ -98,7 +99,7 @@ func (r *revaDownloader) Download(ctx context.Context, path string, dst io.Write if httpRes.StatusCode != http.StatusOK { switch httpRes.StatusCode { case http.StatusNotFound: - return errtypes.NotFound(path) + return errtypes.NotFound(id.String()) default: return errtypes.InternalError(httpRes.Status) } diff --git a/pkg/storage/utils/downloader/mock/downloader_mock.go b/pkg/storage/utils/downloader/mock/downloader_mock.go index 4930dbebd1..11eb9acef3 100644 --- a/pkg/storage/utils/downloader/mock/downloader_mock.go +++ b/pkg/storage/utils/downloader/mock/downloader_mock.go @@ -25,6 +25,8 @@ import ( "os" "github.com/cs3org/reva/pkg/storage/utils/downloader" + + providerv1beta1 "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" ) type mockDownloader struct{} @@ -36,8 +38,8 @@ func NewDownloader() downloader.Downloader { } // Download copies the content of a local file into the dst Writer -func (m *mockDownloader) Download(ctx context.Context, path string, dst io.Writer) error { - f, err := os.Open(path) +func (m *mockDownloader) Download(ctx context.Context, id *providerv1beta1.ResourceId, dst io.Writer) error { + f, err := os.Open(id.OpaqueId) if err != nil { return err } diff --git a/pkg/storage/utils/walker/mock/walker_mock.go b/pkg/storage/utils/walker/mock/walker_mock.go index 725bf79dd9..d537f3428a 100644 --- a/pkg/storage/utils/walker/mock/walker_mock.go +++ b/pkg/storage/utils/walker/mock/walker_mock.go @@ -25,6 +25,7 @@ import ( provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" typesv1beta1 "github.com/cs3org/go-cs3apis/cs3/types/v1beta1" + "github.com/cs3org/reva/pkg/rhttp/router" "github.com/cs3org/reva/pkg/storage/utils/walker" ) @@ -48,7 +49,8 @@ func convertFileInfoToResourceInfo(path string, f fs.FileInfo) *provider.Resourc } return &provider.ResourceInfo{ Type: t, - Path: path, + Path: f.Name(), + Id: &provider.ResourceId{OpaqueId: path}, Size: uint64(f.Size()), Mtime: &typesv1beta1.Timestamp{ Seconds: uint64(f.ModTime().Second()), @@ -58,11 +60,13 @@ func convertFileInfoToResourceInfo(path string, f fs.FileInfo) *provider.Resourc func mockWalkFunc(fn walker.WalkFunc) filepath.WalkFunc { return func(path string, info fs.FileInfo, err error) error { - return fn(path, convertFileInfoToResourceInfo(path, info), err) + _, relativePath := router.ShiftPath(path) + _, relativePath = router.ShiftPath(relativePath) + return fn(filepath.Dir(relativePath), convertFileInfoToResourceInfo(path, info), err) } } // Walk walks into the local file system using the built-in filepath.Walk go function -func (m *mockWalker) Walk(_ context.Context, root string, fn walker.WalkFunc) error { - return filepath.Walk(root, mockWalkFunc(fn)) +func (m *mockWalker) Walk(_ context.Context, root *provider.ResourceId, fn walker.WalkFunc) error { + return filepath.Walk(root.OpaqueId, mockWalkFunc(fn)) } diff --git a/pkg/storage/utils/walker/walker.go b/pkg/storage/utils/walker/walker.go index 2a6059994a..e479cbf065 100644 --- a/pkg/storage/utils/walker/walker.go +++ b/pkg/storage/utils/walker/walker.go @@ -38,12 +38,12 @@ import ( // // The error result returned by the function controls how Walk continues. If the function returns the special value SkipDir, Walk skips the current directory. // Otherwise, if the function returns a non-nil error, Walk stops entirely and returns that error. -type WalkFunc func(path string, info *provider.ResourceInfo, err error) error +type WalkFunc func(wd string, info *provider.ResourceInfo, err error) error // Walker is an interface implemented by objects that are able to walk from a dir rooted into the passed path type Walker interface { // Walk walks the file tree rooted at root, calling fn for each file or folder in the tree, including the root. - Walk(context.Context, string, WalkFunc) error + Walk(ctx context.Context, root *provider.ResourceId, fn WalkFunc) error } type revaWalker struct { @@ -56,14 +56,14 @@ func NewWalker(gtw gateway.GatewayAPIClient) Walker { } // Walk walks the file tree rooted at root, calling fn for each file or folder in the tree, including the root. -func (r *revaWalker) Walk(ctx context.Context, root string, fn WalkFunc) error { +func (r *revaWalker) Walk(ctx context.Context, root *provider.ResourceId, fn WalkFunc) error { info, err := r.stat(ctx, root) if err != nil { - return fn(root, nil, err) + return fn("", nil, err) } - err = r.walkRecursively(ctx, root, info, fn) + err = r.walkRecursively(ctx, "", info, fn) if err == filepath.SkipDir { return nil @@ -72,21 +72,21 @@ func (r *revaWalker) Walk(ctx context.Context, root string, fn WalkFunc) error { return err } -func (r *revaWalker) walkRecursively(ctx context.Context, path string, info *provider.ResourceInfo, fn WalkFunc) error { +func (r *revaWalker) walkRecursively(ctx context.Context, wd string, info *provider.ResourceInfo, fn WalkFunc) error { if info.Type != provider.ResourceType_RESOURCE_TYPE_CONTAINER { - return fn(path, info, nil) + return fn(wd, info, nil) } - list, err := r.readDir(ctx, path) - errFn := fn(path, info, err) + list, err := r.readDir(ctx, info.Id) + errFn := fn(wd, info, err) if err != nil || errFn != nil { return errFn } for _, file := range list { - err = r.walkRecursively(ctx, file.Path, file, fn) + err = r.walkRecursively(ctx, filepath.Join(wd, info.Path), file, fn) if err != nil && (file.Type != provider.ResourceType_RESOURCE_TYPE_CONTAINER || err != filepath.SkipDir) { return err } @@ -95,39 +95,31 @@ func (r *revaWalker) walkRecursively(ctx context.Context, path string, info *pro return nil } -func (r *revaWalker) readDir(ctx context.Context, path string) ([]*provider.ResourceInfo, error) { - resp, err := r.gtw.ListContainer(ctx, &provider.ListContainerRequest{ - Ref: &provider.Reference{ - Path: path, - }, - }) +func (r *revaWalker) readDir(ctx context.Context, id *provider.ResourceId) ([]*provider.ResourceInfo, error) { + resp, err := r.gtw.ListContainer(ctx, &provider.ListContainerRequest{Ref: &provider.Reference{ResourceId: id, Path: "."}}) switch { case err != nil: return nil, err case resp.Status.Code == rpc.Code_CODE_NOT_FOUND: - return nil, errtypes.NotFound(path) + return nil, errtypes.NotFound(id.String()) case resp.Status.Code != rpc.Code_CODE_OK: - return nil, errtypes.InternalError(fmt.Sprintf("error reading dir %s", path)) + return nil, errtypes.InternalError(fmt.Sprintf("error listing container %+v", id)) } return resp.Infos, nil } -func (r *revaWalker) stat(ctx context.Context, path string) (*provider.ResourceInfo, error) { - resp, err := r.gtw.Stat(ctx, &provider.StatRequest{ - Ref: &provider.Reference{ - Path: path, - }, - }) +func (r *revaWalker) stat(ctx context.Context, id *provider.ResourceId) (*provider.ResourceInfo, error) { + resp, err := r.gtw.Stat(ctx, &provider.StatRequest{Ref: &provider.Reference{ResourceId: id, Path: "."}}) switch { case err != nil: return nil, err case resp.Status.Code == rpc.Code_CODE_NOT_FOUND: - return nil, errtypes.NotFound(path) + return nil, errtypes.NotFound(id.String()) case resp.Status.Code != rpc.Code_CODE_OK: - return nil, errtypes.InternalError(fmt.Sprintf("error getting stats from %s", path)) + return nil, errtypes.InternalError(fmt.Sprintf("error stating %+v", id)) } return resp.Info, nil